Renovate display of non-ASCII messages on Windows.

author Noah Misch <noah@leadboat.com>

Wed, 26 Jun 2013 15:17:33 +0000 (11:17 -0400)

committer Noah Misch <noah@leadboat.com>

Wed, 26 Jun 2013 15:17:33 +0000 (11:17 -0400)
author Noah Misch <noah@leadboat.com>
Wed, 26 Jun 2013 15:17:33 +0000 (11:17 -0400)
committer Noah Misch <noah@leadboat.com>
Wed, 26 Jun 2013 15:17:33 +0000 (11:17 -0400)
diff --git a/src/backend/main/main.c b/src/backend/main/main.c

index 8ea6c1f387405a23059a021f89602d827528dfde..d71885dba9d50f8ad0533f2697db48047851ea20 100644 (file)
--- a/src/backend/main/main.c
+++ b/src/backend/main/main.c
@@ -265,6 +265,10 @@ startup_hacks(const char *progname)
  /*
   * Help display should match the options accepted by PostmasterMain()
   * and PostgresMain().
+ *
+ * XXX On Windows, non-ASCII localizations of these messages only display
+ * correctly if the console output code page covers the necessary characters.
+ * Messages emitted in write_console() do not exhibit this problem.
   */
  static void
  help(const char *progname)
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c

index 7081b00500bec9230c41823ca992f99d7ebba00b..3d85e297d22481de8f3c1a2cb19aadee866d7251 100644 (file)
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -131,14 +131,16 @@ static char *IsoLocaleName(const char *);     /* MSVC specific */
  /*
   * pg_perm_setlocale
   *
- * This is identical to the libc function setlocale(), with the addition
- * that if the operation is successful, the corresponding LC_XXX environment
- * variable is set to match.  By setting the environment variable, we ensure
- * that any subsequent use of setlocale(..., "") will preserve the settings
- * made through this routine.  Of course, LC_ALL must also be unset to fully
- * ensure that, but that has to be done elsewhere after all the individual
- * LC_XXX variables have been set correctly.  (Thank you Perl for making this
- * kluge necessary.)
+ * This wraps the libc function setlocale(), with two additions.  First, when
+ * changing LC_CTYPE, update gettext's encoding for the current message
+ * domain.  GNU gettext automatically tracks LC_CTYPE on most platforms, but
+ * not on Windows.  Second, if the operation is successful, the corresponding
+ * LC_XXX environment variable is set to match.  By setting the environment
+ * variable, we ensure that any subsequent use of setlocale(..., "") will
+ * preserve the settings made through this routine.  Of course, LC_ALL must
+ * also be unset to fully ensure that, but that has to be done elsewhere after
+ * all the individual LC_XXX variables have been set correctly.  (Thank you
+ * Perl for making this kluge necessary.)
   */
  char *
  pg_perm_setlocale(int category, const char *locale)
@@ -172,6 +174,22 @@ pg_perm_setlocale(int category, const char *locale)
     if (result == NULL)
         return result;          /* fall out immediately on failure */
  
+   /*
+    * Use the right encoding in translated messages.  Under ENABLE_NLS, let
+    * pg_bind_textdomain_codeset() figure it out.  Under !ENABLE_NLS, message
+    * format strings are ASCII, but database-encoding strings may enter the
+    * message via %s.  This makes the overall message encoding equal to the
+    * database encoding.
+    */
+   if (category == LC_CTYPE)
+   {
+#ifdef ENABLE_NLS
+       SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
+#else
+       SetMessageEncoding(GetDatabaseEncoding());
+#endif
+   }
+
     switch (category)
     {
         case LC_COLLATE:
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c

index 7f03f419dead8f7f325a8e24a345d23fb1667192..706c01eca55f37e6c8f0370e5162f00b512fb2a4 100644 (file)
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -1813,6 +1813,22 @@ write_syslog(int level, const char *line)
  #endif   /* HAVE_SYSLOG */
  
  #ifdef WIN32
+/*
+ * Get the PostgreSQL equivalent of the Windows ANSI code page.  "ANSI" system
+ * interfaces (e.g. CreateFileA()) expect string arguments in this encoding.
+ * Every process in a given system will find the same value at all times.
+ */
+static int
+GetACPEncoding(void)
+{
+   static int  encoding = -2;
+
+   if (encoding == -2)
+       encoding = pg_codepage_to_encoding(GetACP());
+
+   return encoding;
+}
+
  /*
   * Write a message line to the windows event log
   */
@@ -1858,16 +1874,18 @@ write_eventlog(int level, const char *line, int len)
     }
  
     /*
-    * Convert message to UTF16 text and write it with ReportEventW, but
-    * fall-back into ReportEventA if conversion failed.
+    * If message character encoding matches the encoding expected by
+    * ReportEventA(), call it to avoid the hazards of conversion.  Otherwise,
+    * try to convert the message to UTF16 and write it with ReportEventW().
+    * Fall back on ReportEventA() if conversion failed.
      *
      * Also verify that we are not on our way into error recursion trouble due
-    * to error messages thrown deep inside pgwin32_toUTF16().
+    * to error messages thrown deep inside pgwin32_message_to_UTF16().
      */
-   if (GetDatabaseEncoding() != GetPlatformEncoding() &&
-       !in_error_recursion_trouble())
+   if (!in_error_recursion_trouble() &&
+       GetMessageEncoding() != GetACPEncoding())
     {
-       utf16 = pgwin32_toUTF16(line, len, NULL);
+       utf16 = pgwin32_message_to_UTF16(line, len, NULL);
         if (utf16)
         {
             ReportEventW(evtHandle,
@@ -1879,6 +1897,7 @@ write_eventlog(int level, const char *line, int len)
                          0,
                          (LPCWSTR *) &utf16,
                          NULL);
+           /* XXX Try ReportEventA() when ReportEventW() fails? */
  
             pfree(utf16);
             return;
@@ -1904,22 +1923,30 @@ write_console(const char *line, int len)
  #ifdef WIN32
  
     /*
-    * WriteConsoleW() will fail if stdout is redirected, so just fall through
+    * Try to convert the message to UTF16 and write it with WriteConsoleW().
+    * Fall back on write() if anything fails.
+    *
+    * In contrast to write_eventlog(), don't skip straight to write() based
+    * on the applicable encodings.  Unlike WriteConsoleW(), write() depends
+    * on the suitability of the console output code page.  Since we put
+    * stderr into binary mode in SubPostmasterMain(), write() skips the
+    * necessary translation anyway.
+    *
+    * WriteConsoleW() will fail if stderr is redirected, so just fall through
      * to writing unconverted to the logfile in this case.
      *
      * Since we palloc the structure required for conversion, also fall
      * through to writing unconverted if we have not yet set up
      * CurrentMemoryContext.
      */
-   if (GetDatabaseEncoding() != GetPlatformEncoding() &&
-       !in_error_recursion_trouble() &&
+   if (!in_error_recursion_trouble() &&
         !redirection_done &&
         CurrentMemoryContext != NULL)
     {
         WCHAR      *utf16;
         int         utf16len;
  
-       utf16 = pgwin32_toUTF16(line, len, &utf16len);
+       utf16 = pgwin32_message_to_UTF16(line, len, &utf16len);
         if (utf16 != NULL)
         {
             HANDLE      stdHandle;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c

index e0abff1145a3b8b241a33556dd396c9be6911d40..e0ea2e9ecfcf4aa2a28399677d51fa68738ca751 100644 (file)
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -357,11 +357,6 @@ CheckMyDatabase(const char *name, bool am_superuser)
     SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE);
     SetConfigOption("lc_ctype", ctype, PGC_INTERNAL, PGC_S_OVERRIDE);
  
-   /* Use the right encoding in translated messages */
-#ifdef ENABLE_NLS
-   pg_bind_textdomain_codeset(textdomain(NULL));
-#endif
-
     ReleaseSysCache(tup);
  }
  
diff --git a/src/backend/utils/mb/encnames.c b/src/backend/utils/mb/encnames.c

index 9a05e573ffac7077a319efd6335fe45f1e4beb3b..772d4a5d056aff11cd947e9514a1521096176353 100644 (file)
--- a/src/backend/utils/mb/encnames.c
+++ b/src/backend/utils/mb/encnames.c
@@ -352,10 +352,13 @@ pg_enc2name pg_enc2name_tbl[] =
  
  /* ----------
   * These are encoding names for gettext.
+ *
+ * This covers all encodings except MULE_INTERNAL, which is alien to gettext.
   * ----------
   */
  pg_enc2gettext pg_enc2gettext_tbl[] =
  {
+   {PG_SQL_ASCII, "US-ASCII"},
     {PG_UTF8, "UTF-8"},
     {PG_LATIN1, "LATIN1"},
     {PG_LATIN2, "LATIN2"},
@@ -389,6 +392,13 @@ pg_enc2gettext pg_enc2gettext_tbl[] =
     {PG_EUC_KR, "EUC-KR"},
     {PG_EUC_TW, "EUC-TW"},
     {PG_EUC_JIS_2004, "EUC-JP"},
+   {PG_SJIS, "SHIFT-JIS"},
+   {PG_BIG5, "BIG5"},
+   {PG_GBK, "GBK"},
+   {PG_UHC, "UHC"},
+   {PG_GB18030, "GB18030"},
+   {PG_JOHAB, "JOHAB"},
+   {PG_SHIFT_JIS_2004, "SHIFT_JISX0213"},
     {0, NULL}
  };
  
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c

index 4582219af73fc0c995051fa65e0c9a3029ed0285..6d1cd8e87590248f058092ee93f6e7fc5c0f4c09 100644 (file)
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -53,11 +53,11 @@ static FmgrInfo *ToServerConvProc = NULL;
  static FmgrInfo *ToClientConvProc = NULL;
  
  /*
- * These variables track the currently selected FE and BE encodings.
+ * These variables track the currently-selected encodings.
   */
  static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
  static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
-static pg_enc2name *PlatformEncoding = NULL;
+static pg_enc2name *MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
  
  /*
   * During backend startup we can't set client encoding because we (a)
@@ -881,46 +881,102 @@ SetDatabaseEncoding(int encoding)
     Assert(DatabaseEncoding->encoding == encoding);
  }
  
-/*
- * Bind gettext to the codeset equivalent with the database encoding.
- */
  void
-pg_bind_textdomain_codeset(const char *domainname)
+SetMessageEncoding(int encoding)
  {
-#if defined(ENABLE_NLS)
-   int         encoding = GetDatabaseEncoding();
-   int         i;
+   /* Some calls happen before we can elog()! */
+   Assert(PG_VALID_ENCODING(encoding));
  
-   /*
-    * gettext() uses the codeset specified by LC_CTYPE by default, so if that
-    * matches the database encoding we don't need to do anything. In CREATE
-    * DATABASE, we enforce or trust that the locale's codeset matches
-    * database encoding, except for the C locale. In C locale, we bind
-    * gettext() explicitly to the right codeset.
-    *
-    * On Windows, though, gettext() tends to get confused so we always bind
-    * it.
-    */
-#ifndef WIN32
-   const char *ctype = setlocale(LC_CTYPE, NULL);
+   MessageEncoding = &pg_enc2name_tbl[encoding];
+   Assert(MessageEncoding->encoding == encoding);
+}
  
-   if (pg_strcasecmp(ctype, "C") != 0 && pg_strcasecmp(ctype, "POSIX") != 0)
-       return;
-#endif
+#ifdef ENABLE_NLS
+/*
+ * Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext
+ * codeset.  Fails for MULE_INTERNAL, an encoding unknown to gettext; can also
+ * fail for gettext-internal causes like out-of-memory.
+ */
+static bool
+raw_pg_bind_textdomain_codeset(const char *domainname, int encoding)
+{
+   bool        elog_ok = (CurrentMemoryContext != NULL);
+   int         i;
  
     for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++)
     {
         if (pg_enc2gettext_tbl[i].encoding == encoding)
         {
             if (bind_textdomain_codeset(domainname,
-                                       pg_enc2gettext_tbl[i].name) == NULL)
+                                       pg_enc2gettext_tbl[i].name) != NULL)
+               return true;
+
+           if (elog_ok)
                 elog(LOG, "bind_textdomain_codeset failed");
+           else
+               write_stderr("bind_textdomain_codeset failed");
+
             break;
         }
     }
+
+   return false;
+}
+
+/*
+ * Bind a gettext message domain to the codeset corresponding to the database
+ * encoding.  For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE.
+ * Return the MessageEncoding implied by the new settings.
+ *
+ * On most platforms, gettext defaults to the codeset implied by LC_CTYPE.
+ * When that matches the database encoding, we don't need to do anything.  In
+ * CREATE DATABASE, we enforce or trust that the locale's codeset matches the
+ * database encoding, except for the C locale.  (On Windows, we also permit a
+ * discrepancy under the UTF8 encoding.)  For the C locale, explicitly bind
+ * gettext to the right codeset.
+ *
+ * On Windows, gettext defaults to the Windows ANSI code page.  This is a
+ * convenient departure for software that passes the strings to Windows ANSI
+ * APIs, but we don't do that.  Compel gettext to use database encoding or,
+ * failing that, the LC_CTYPE encoding as it would on other platforms.
+ *
+ * This function is called before elog() and palloc() are usable.
+ */
+int
+pg_bind_textdomain_codeset(const char *domainname)
+{
+   bool        elog_ok = (CurrentMemoryContext != NULL);
+   int         encoding = GetDatabaseEncoding();
+   int         new_msgenc;
+
+#ifndef WIN32
+   const char *ctype = setlocale(LC_CTYPE, NULL);
+
+   if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
  #endif
+       if (encoding != PG_SQL_ASCII &&
+           raw_pg_bind_textdomain_codeset(domainname, encoding))
+           return encoding;
+
+   new_msgenc = pg_get_encoding_from_locale(NULL, elog_ok);
+   if (new_msgenc < 0)
+       new_msgenc = PG_SQL_ASCII;
+
+#ifdef WIN32
+   if (!raw_pg_bind_textdomain_codeset(domainname, new_msgenc))
+       /* On failure, the old message encoding remains valid. */
+       return GetMessageEncoding();
+#endif
+
+   return new_msgenc;
  }
+#endif
  
+/*
+ * The database encoding, also called the server encoding, represents the
+ * encoding of data stored in text-like data types.  Affected types include
+ * cstring, text, varchar, name, xml, and json.
+ */
  int
  GetDatabaseEncoding(void)
  {
@@ -949,19 +1005,17 @@ pg_client_encoding(PG_FUNCTION_ARGS)
     return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
  }
  
+/*
+ * gettext() returns messages in this encoding.  This often matches the
+ * database encoding, but it differs for SQL_ASCII databases, for processes
+ * not attached to a database, and under a database encoding lacking iconv
+ * support (MULE_INTERNAL).
+ */
  int
-GetPlatformEncoding(void)
+GetMessageEncoding(void)
  {
-   if (PlatformEncoding == NULL)
-   {
-       /* try to determine encoding of server's environment locale */
-       int         encoding = pg_get_encoding_from_locale("", true);
-
-       if (encoding < 0)
-           encoding = PG_SQL_ASCII;
-       PlatformEncoding = &pg_enc2name_tbl[encoding];
-   }
-   return PlatformEncoding->encoding;
+   Assert(MessageEncoding);
+   return MessageEncoding->encoding;
  }
  
  #ifdef WIN32
@@ -971,13 +1025,13 @@ GetPlatformEncoding(void)
   * is also passed to utf16len if not null. Returns NULL iff failed.
   */
  WCHAR *
-pgwin32_toUTF16(const char *str, int len, int *utf16len)
+pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
  {
     WCHAR      *utf16;
     int         dstlen;
     UINT        codepage;
  
-   codepage = pg_enc2name_tbl[GetDatabaseEncoding()].codepage;
+   codepage = pg_enc2name_tbl[GetMessageEncoding()].codepage;
  
     /*
      * Use MultiByteToWideChar directly if there is a corresponding codepage,
@@ -994,7 +1048,7 @@ pgwin32_toUTF16(const char *str, int len, int *utf16len)
         char       *utf8;
  
         utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
-                                       len, GetDatabaseEncoding(), PG_UTF8);
+                                        len, GetMessageEncoding(), PG_UTF8);
         if (utf8 != str)
             len = strlen(utf8);
  
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h

index 725865595a7e6feb856e606094eb6eaf229add4a..d255c64bc1a44f689355d0e461e92aa55528a1c9 100644 (file)
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -481,8 +481,12 @@ extern const char *pg_get_client_encoding_name(void);
  extern void SetDatabaseEncoding(int encoding);
  extern int GetDatabaseEncoding(void);
  extern const char *GetDatabaseEncodingName(void);
-extern int GetPlatformEncoding(void);
-extern void pg_bind_textdomain_codeset(const char *domainname);
+extern void SetMessageEncoding(int encoding);
+extern int GetMessageEncoding(void);
+
+#ifdef ENABLE_NLS
+extern int pg_bind_textdomain_codeset(const char *domainname);
+#endif
  
  extern int pg_valid_client_encoding(const char *name);
  extern int pg_valid_server_encoding(const char *name);
@@ -542,7 +546,7 @@ extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p,
  extern bool pg_utf8_islegal(const unsigned char *source, int length);
  
  #ifdef WIN32
-extern WCHAR *pgwin32_toUTF16(const char *str, int len, int *utf16len);
+extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);
  #endif
  
  #endif   /* PG_WCHAR_H */
diff --git a/src/include/port.h b/src/include/port.h

index 5eda5f0af55e2198b864faf31905990f328fd051..5ef4b0a0b11863d96213e4379ee958109691cb98 100644 (file)
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -452,6 +452,10 @@ extern void qsort_arg(void *base, size_t nel, size_t elsize,
  /* port/chklocale.c */
  extern int pg_get_encoding_from_locale(const char *ctype, bool write_message);
  
+#if defined(WIN32) && !defined(FRONTEND)
+extern int pg_codepage_to_encoding(UINT cp);
+#endif
+
  /* port/inet_net_ntop.c */
  extern char *inet_net_ntop(int af, const void *src, int bits,
               char *dst, size_t size);
diff --git a/src/port/chklocale.c b/src/port/chklocale.c

index 9e889383f26ade91f1c92e0491adff51c592a676..8b8862ffb29a21936dd4fa9f1772485f649f3281 100644 (file)
--- a/src/port/chklocale.c
+++ b/src/port/chklocale.c
@@ -235,6 +235,32 @@ win32_langinfo(const char *ctype)
  
     return r;
  }
+
+#ifndef FRONTEND
+/*
+ * Given a Windows code page identifier, find the corresponding PostgreSQL
+ * encoding.  Issue a warning and return -1 if none found.
+ */
+int
+pg_codepage_to_encoding(UINT cp)
+{
+   char        sys[16];
+   int         i;
+
+   sprintf(sys, "CP%u", cp);
+
+   /* Check the table */
+   for (i = 0; encoding_match_list[i].system_enc_name; i++)
+       if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
+           return encoding_match_list[i].pg_enc_code;
+
+   ereport(WARNING,
+           (errmsg("could not determine encoding for codeset \"%s\"", sys),
+          errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
+
+   return -1;
+}
+#endif
  #endif   /* WIN32 */
  
  #if (defined(HAVE_LANGINFO_H) && defined(CODESET)) || defined(WIN32)
@@ -248,6 +274,9 @@ win32_langinfo(const char *ctype)
   *
   * If the result is PG_SQL_ASCII, callers should treat it as being compatible
   * with any desired encoding.
+ *
+ * If running in the backend and write_message is false, this function must
+ * cope with the possibility that elog() and palloc() are not yet usable.
   */
  int
  pg_get_encoding_from_locale(const char *ctype, bool write_message)
author	Noah Misch <noah@leadboat.com>
	Wed, 26 Jun 2013 15:17:33 +0000 (11:17 -0400)
committer	Noah Misch <noah@leadboat.com>
	Wed, 26 Jun 2013 15:17:33 +0000 (11:17 -0400)
src/backend/main/main.c		patch \| blob \| blame \| history
src/backend/utils/adt/pg_locale.c		patch \| blob \| blame \| history
src/backend/utils/error/elog.c		patch \| blob \| blame \| history
src/backend/utils/init/postinit.c		patch \| blob \| blame \| history
src/backend/utils/mb/encnames.c		patch \| blob \| blame \| history
src/backend/utils/mb/mbutils.c		patch \| blob \| blame \| history
src/include/mb/pg_wchar.h		patch \| blob \| blame \| history
src/include/port.h		patch \| blob \| blame \| history
src/port/chklocale.c		patch \| blob \| blame \| history