/*
* Help display should match the options accepted by PostmasterMain()
* and PostgresMain().
+ *
+ * XXX On Windows, non-ASCII localizations of these messages only display
+ * correctly if the console output code page covers the necessary characters.
+ * Messages emitted in write_console() do not exhibit this problem.
*/
static void
help(const char *progname)
/*
* pg_perm_setlocale
*
- * This is identical to the libc function setlocale(), with the addition
- * that if the operation is successful, the corresponding LC_XXX environment
- * variable is set to match. By setting the environment variable, we ensure
- * that any subsequent use of setlocale(..., "") will preserve the settings
- * made through this routine. Of course, LC_ALL must also be unset to fully
- * ensure that, but that has to be done elsewhere after all the individual
- * LC_XXX variables have been set correctly. (Thank you Perl for making this
- * kluge necessary.)
+ * This wraps the libc function setlocale(), with two additions. First, when
+ * changing LC_CTYPE, update gettext's encoding for the current message
+ * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
+ * not on Windows. Second, if the operation is successful, the corresponding
+ * LC_XXX environment variable is set to match. By setting the environment
+ * variable, we ensure that any subsequent use of setlocale(..., "") will
+ * preserve the settings made through this routine. Of course, LC_ALL must
+ * also be unset to fully ensure that, but that has to be done elsewhere after
+ * all the individual LC_XXX variables have been set correctly. (Thank you
+ * Perl for making this kluge necessary.)
*/
char *
pg_perm_setlocale(int category, const char *locale)
if (result == NULL)
return result; /* fall out immediately on failure */
+ /*
+ * Use the right encoding in translated messages. Under ENABLE_NLS, let
+ * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
+ * format strings are ASCII, but database-encoding strings may enter the
+ * message via %s. This makes the overall message encoding equal to the
+ * database encoding.
+ */
+ if (category == LC_CTYPE)
+ {
+#ifdef ENABLE_NLS
+ SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
+#else
+ SetMessageEncoding(GetDatabaseEncoding());
+#endif
+ }
+
switch (category)
{
case LC_COLLATE:
#endif /* HAVE_SYSLOG */
#ifdef WIN32
+/*
+ * Get the PostgreSQL equivalent of the Windows ANSI code page. "ANSI" system
+ * interfaces (e.g. CreateFileA()) expect string arguments in this encoding.
+ * Every process in a given system will find the same value at all times.
+ */
+static int
+GetACPEncoding(void)
+{
+ static int encoding = -2;
+
+ if (encoding == -2)
+ encoding = pg_codepage_to_encoding(GetACP());
+
+ return encoding;
+}
+
/*
* Write a message line to the windows event log
*/
}
/*
- * Convert message to UTF16 text and write it with ReportEventW, but
- * fall-back into ReportEventA if conversion failed.
+ * If message character encoding matches the encoding expected by
+ * ReportEventA(), call it to avoid the hazards of conversion. Otherwise,
+ * try to convert the message to UTF16 and write it with ReportEventW().
+ * Fall back on ReportEventA() if conversion failed.
*
* Also verify that we are not on our way into error recursion trouble due
- * to error messages thrown deep inside pgwin32_toUTF16().
+ * to error messages thrown deep inside pgwin32_message_to_UTF16().
*/
- if (GetDatabaseEncoding() != GetPlatformEncoding() &&
- !in_error_recursion_trouble())
+ if (!in_error_recursion_trouble() &&
+ GetMessageEncoding() != GetACPEncoding())
{
- utf16 = pgwin32_toUTF16(line, len, NULL);
+ utf16 = pgwin32_message_to_UTF16(line, len, NULL);
if (utf16)
{
ReportEventW(evtHandle,
0,
(LPCWSTR *) &utf16,
NULL);
+ /* XXX Try ReportEventA() when ReportEventW() fails? */
pfree(utf16);
return;
#ifdef WIN32
/*
- * WriteConsoleW() will fail if stdout is redirected, so just fall through
+ * Try to convert the message to UTF16 and write it with WriteConsoleW().
+ * Fall back on write() if anything fails.
+ *
+ * In contrast to write_eventlog(), don't skip straight to write() based
+ * on the applicable encodings. Unlike WriteConsoleW(), write() depends
+ * on the suitability of the console output code page. Since we put
+ * stderr into binary mode in SubPostmasterMain(), write() skips the
+ * necessary translation anyway.
+ *
+ * WriteConsoleW() will fail if stderr is redirected, so just fall through
* to writing unconverted to the logfile in this case.
*
* Since we palloc the structure required for conversion, also fall
* through to writing unconverted if we have not yet set up
* CurrentMemoryContext.
*/
- if (GetDatabaseEncoding() != GetPlatformEncoding() &&
- !in_error_recursion_trouble() &&
+ if (!in_error_recursion_trouble() &&
!redirection_done &&
CurrentMemoryContext != NULL)
{
WCHAR *utf16;
int utf16len;
- utf16 = pgwin32_toUTF16(line, len, &utf16len);
+ utf16 = pgwin32_message_to_UTF16(line, len, &utf16len);
if (utf16 != NULL)
{
HANDLE stdHandle;
SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE);
SetConfigOption("lc_ctype", ctype, PGC_INTERNAL, PGC_S_OVERRIDE);
- /* Use the right encoding in translated messages */
-#ifdef ENABLE_NLS
- pg_bind_textdomain_codeset(textdomain(NULL));
-#endif
-
ReleaseSysCache(tup);
}
/* ----------
* These are encoding names for gettext.
+ *
+ * This covers all encodings except MULE_INTERNAL, which is alien to gettext.
* ----------
*/
pg_enc2gettext pg_enc2gettext_tbl[] =
{
+ {PG_SQL_ASCII, "US-ASCII"},
{PG_UTF8, "UTF-8"},
{PG_LATIN1, "LATIN1"},
{PG_LATIN2, "LATIN2"},
{PG_EUC_KR, "EUC-KR"},
{PG_EUC_TW, "EUC-TW"},
{PG_EUC_JIS_2004, "EUC-JP"},
+ {PG_SJIS, "SHIFT-JIS"},
+ {PG_BIG5, "BIG5"},
+ {PG_GBK, "GBK"},
+ {PG_UHC, "UHC"},
+ {PG_GB18030, "GB18030"},
+ {PG_JOHAB, "JOHAB"},
+ {PG_SHIFT_JIS_2004, "SHIFT_JISX0213"},
{0, NULL}
};
static FmgrInfo *ToClientConvProc = NULL;
/*
- * These variables track the currently selected FE and BE encodings.
+ * These variables track the currently-selected encodings.
*/
static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
-static pg_enc2name *PlatformEncoding = NULL;
+static pg_enc2name *MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
/*
* During backend startup we can't set client encoding because we (a)
Assert(DatabaseEncoding->encoding == encoding);
}
-/*
- * Bind gettext to the codeset equivalent with the database encoding.
- */
void
-pg_bind_textdomain_codeset(const char *domainname)
+SetMessageEncoding(int encoding)
{
-#if defined(ENABLE_NLS)
- int encoding = GetDatabaseEncoding();
- int i;
+ /* Some calls happen before we can elog()! */
+ Assert(PG_VALID_ENCODING(encoding));
- /*
- * gettext() uses the codeset specified by LC_CTYPE by default, so if that
- * matches the database encoding we don't need to do anything. In CREATE
- * DATABASE, we enforce or trust that the locale's codeset matches
- * database encoding, except for the C locale. In C locale, we bind
- * gettext() explicitly to the right codeset.
- *
- * On Windows, though, gettext() tends to get confused so we always bind
- * it.
- */
-#ifndef WIN32
- const char *ctype = setlocale(LC_CTYPE, NULL);
+ MessageEncoding = &pg_enc2name_tbl[encoding];
+ Assert(MessageEncoding->encoding == encoding);
+}
- if (pg_strcasecmp(ctype, "C") != 0 && pg_strcasecmp(ctype, "POSIX") != 0)
- return;
-#endif
+#ifdef ENABLE_NLS
+/*
+ * Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext
+ * codeset. Fails for MULE_INTERNAL, an encoding unknown to gettext; can also
+ * fail for gettext-internal causes like out-of-memory.
+ */
+static bool
+raw_pg_bind_textdomain_codeset(const char *domainname, int encoding)
+{
+ bool elog_ok = (CurrentMemoryContext != NULL);
+ int i;
for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++)
{
if (pg_enc2gettext_tbl[i].encoding == encoding)
{
if (bind_textdomain_codeset(domainname,
- pg_enc2gettext_tbl[i].name) == NULL)
+ pg_enc2gettext_tbl[i].name) != NULL)
+ return true;
+
+ if (elog_ok)
elog(LOG, "bind_textdomain_codeset failed");
+ else
+ write_stderr("bind_textdomain_codeset failed");
+
break;
}
}
+
+ return false;
+}
+
+/*
+ * Bind a gettext message domain to the codeset corresponding to the database
+ * encoding. For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE.
+ * Return the MessageEncoding implied by the new settings.
+ *
+ * On most platforms, gettext defaults to the codeset implied by LC_CTYPE.
+ * When that matches the database encoding, we don't need to do anything. In
+ * CREATE DATABASE, we enforce or trust that the locale's codeset matches the
+ * database encoding, except for the C locale. (On Windows, we also permit a
+ * discrepancy under the UTF8 encoding.) For the C locale, explicitly bind
+ * gettext to the right codeset.
+ *
+ * On Windows, gettext defaults to the Windows ANSI code page. This is a
+ * convenient departure for software that passes the strings to Windows ANSI
+ * APIs, but we don't do that. Compel gettext to use database encoding or,
+ * failing that, the LC_CTYPE encoding as it would on other platforms.
+ *
+ * This function is called before elog() and palloc() are usable.
+ */
+int
+pg_bind_textdomain_codeset(const char *domainname)
+{
+ bool elog_ok = (CurrentMemoryContext != NULL);
+ int encoding = GetDatabaseEncoding();
+ int new_msgenc;
+
+#ifndef WIN32
+ const char *ctype = setlocale(LC_CTYPE, NULL);
+
+ if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
#endif
+ if (encoding != PG_SQL_ASCII &&
+ raw_pg_bind_textdomain_codeset(domainname, encoding))
+ return encoding;
+
+ new_msgenc = pg_get_encoding_from_locale(NULL, elog_ok);
+ if (new_msgenc < 0)
+ new_msgenc = PG_SQL_ASCII;
+
+#ifdef WIN32
+ if (!raw_pg_bind_textdomain_codeset(domainname, new_msgenc))
+ /* On failure, the old message encoding remains valid. */
+ return GetMessageEncoding();
+#endif
+
+ return new_msgenc;
}
+#endif
+/*
+ * The database encoding, also called the server encoding, represents the
+ * encoding of data stored in text-like data types. Affected types include
+ * cstring, text, varchar, name, xml, and json.
+ */
int
GetDatabaseEncoding(void)
{
return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
}
+/*
+ * gettext() returns messages in this encoding. This often matches the
+ * database encoding, but it differs for SQL_ASCII databases, for processes
+ * not attached to a database, and under a database encoding lacking iconv
+ * support (MULE_INTERNAL).
+ */
int
-GetPlatformEncoding(void)
+GetMessageEncoding(void)
{
- if (PlatformEncoding == NULL)
- {
- /* try to determine encoding of server's environment locale */
- int encoding = pg_get_encoding_from_locale("", true);
-
- if (encoding < 0)
- encoding = PG_SQL_ASCII;
- PlatformEncoding = &pg_enc2name_tbl[encoding];
- }
- return PlatformEncoding->encoding;
+ Assert(MessageEncoding);
+ return MessageEncoding->encoding;
}
#ifdef WIN32
* is also passed to utf16len if not null. Returns NULL iff failed.
*/
WCHAR *
-pgwin32_toUTF16(const char *str, int len, int *utf16len)
+pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
{
WCHAR *utf16;
int dstlen;
UINT codepage;
- codepage = pg_enc2name_tbl[GetDatabaseEncoding()].codepage;
+ codepage = pg_enc2name_tbl[GetMessageEncoding()].codepage;
/*
* Use MultiByteToWideChar directly if there is a corresponding codepage,
char *utf8;
utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
- len, GetDatabaseEncoding(), PG_UTF8);
+ len, GetMessageEncoding(), PG_UTF8);
if (utf8 != str)
len = strlen(utf8);
extern void SetDatabaseEncoding(int encoding);
extern int GetDatabaseEncoding(void);
extern const char *GetDatabaseEncodingName(void);
-extern int GetPlatformEncoding(void);
-extern void pg_bind_textdomain_codeset(const char *domainname);
+extern void SetMessageEncoding(int encoding);
+extern int GetMessageEncoding(void);
+
+#ifdef ENABLE_NLS
+extern int pg_bind_textdomain_codeset(const char *domainname);
+#endif
extern int pg_valid_client_encoding(const char *name);
extern int pg_valid_server_encoding(const char *name);
extern bool pg_utf8_islegal(const unsigned char *source, int length);
#ifdef WIN32
-extern WCHAR *pgwin32_toUTF16(const char *str, int len, int *utf16len);
+extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);
#endif
#endif /* PG_WCHAR_H */
/* port/chklocale.c */
extern int pg_get_encoding_from_locale(const char *ctype, bool write_message);
+#if defined(WIN32) && !defined(FRONTEND)
+extern int pg_codepage_to_encoding(UINT cp);
+#endif
+
/* port/inet_net_ntop.c */
extern char *inet_net_ntop(int af, const void *src, int bits,
char *dst, size_t size);
return r;
}
+
+#ifndef FRONTEND
+/*
+ * Given a Windows code page identifier, find the corresponding PostgreSQL
+ * encoding. Issue a warning and return -1 if none found.
+ */
+int
+pg_codepage_to_encoding(UINT cp)
+{
+ char sys[16];
+ int i;
+
+ sprintf(sys, "CP%u", cp);
+
+ /* Check the table */
+ for (i = 0; encoding_match_list[i].system_enc_name; i++)
+ if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
+ return encoding_match_list[i].pg_enc_code;
+
+ ereport(WARNING,
+ (errmsg("could not determine encoding for codeset \"%s\"", sys),
+ errdetail("Please report this to <pgsql-bugs@postgresql.org>.")));
+
+ return -1;
+}
+#endif
#endif /* WIN32 */
#if (defined(HAVE_LANGINFO_H) && defined(CODESET)) || defined(WIN32)
*
* If the result is PG_SQL_ASCII, callers should treat it as being compatible
* with any desired encoding.
+ *
+ * If running in the backend and write_message is false, this function must
+ * cope with the possibility that elog() and palloc() are not yet usable.
*/
int
pg_get_encoding_from_locale(const char *ctype, bool write_message)