* upper/lower/initcap functions
*****************************************************************************/
-#ifdef USE_ICU
-
-typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode);
-
-static int32_t
-icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
- UChar **buff_dest, UChar *buff_source, int32_t len_source)
-{
- UErrorCode status;
- int32_t len_dest;
-
- len_dest = len_source; /* try first with same length */
- *buff_dest = palloc(len_dest * sizeof(**buff_dest));
- status = U_ZERO_ERROR;
- len_dest = func(*buff_dest, len_dest, buff_source, len_source,
- mylocale->info.icu.locale, &status);
- if (status == U_BUFFER_OVERFLOW_ERROR)
- {
- /* try again with adjusted length */
- pfree(*buff_dest);
- *buff_dest = palloc(len_dest * sizeof(**buff_dest));
- status = U_ZERO_ERROR;
- len_dest = func(*buff_dest, len_dest, buff_source, len_source,
- mylocale->info.icu.locale, &status);
- }
- if (U_FAILURE(status))
- ereport(ERROR,
- (errmsg("case conversion failed: %s", u_errorName(status))));
- return len_dest;
-}
-
-static int32_t
-u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode)
-{
- return u_strToTitle(dest, destCapacity, src, srcLength,
- NULL, locale, pErrorCode);
-}
-
-#endif /* USE_ICU */
-
/*
* If the system provides the needed functions for wide-character manipulation
* (which are all standardized by C99), then we implement upper/lower/initcap
}
else
{
-#ifdef USE_ICU
- if (mylocale->provider == COLLPROVIDER_ICU)
- {
- int32_t len_uchar;
- int32_t len_conv;
- UChar *buff_uchar;
- UChar *buff_conv;
-
- len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
- len_conv = icu_convert_case(u_strToLower, mylocale,
- &buff_conv, buff_uchar, len_uchar);
- icu_from_uchar(&result, buff_conv, len_conv);
- pfree(buff_uchar);
- pfree(buff_conv);
- }
- else
-#endif
- if (mylocale->provider == COLLPROVIDER_BUILTIN)
+ const char *src = buff;
+ size_t srclen = nbytes;
+ size_t dstsize;
+ char *dst;
+ size_t needed;
+
+ /* first try buffer of equal size plus terminating NUL */
+ dstsize = srclen + 1;
+ dst = palloc(dstsize);
+
+ needed = pg_strlower(dst, dstsize, src, srclen, mylocale);
+ if (needed + 1 > dstsize)
{
- const char *src = buff;
- size_t srclen = nbytes;
- size_t dstsize;
- char *dst;
- size_t needed;
-
- Assert(GetDatabaseEncoding() == PG_UTF8);
-
- /* first try buffer of equal size plus terminating NUL */
- dstsize = srclen + 1;
- dst = palloc(dstsize);
-
- needed = unicode_strlower(dst, dstsize, src, srclen);
- if (needed + 1 > dstsize)
- {
- /* grow buffer if needed and retry */
- dstsize = needed + 1;
- dst = repalloc(dst, dstsize);
- needed = unicode_strlower(dst, dstsize, src, srclen);
- Assert(needed + 1 == dstsize);
- }
-
- Assert(dst[needed] == '\0');
- result = dst;
+ /* grow buffer if needed and retry */
+ dstsize = needed + 1;
+ dst = repalloc(dst, dstsize);
+ needed = pg_strlower(dst, dstsize, src, srclen, mylocale);
+ Assert(needed + 1 <= dstsize);
}
- else
- {
- Assert(mylocale->provider == COLLPROVIDER_LIBC);
-
- if (pg_database_encoding_max_length() > 1)
- {
- wchar_t *workspace;
- size_t curr_char;
- size_t result_size;
-
- /* Overflow paranoia */
- if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
-
- /* Output workspace cannot have more codes than input bytes */
- workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
- char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
-
- for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
-
- /*
- * Make result large enough; case change might change number
- * of bytes
- */
- result_size = curr_char * pg_database_encoding_max_length() + 1;
- result = palloc(result_size);
- wchar2char(result, workspace, result_size, mylocale);
- pfree(workspace);
- }
- else
- {
- char *p;
-
- result = pnstrdup(buff, nbytes);
-
- /*
- * Note: we assume that tolower_l() will not be so broken as
- * to need an isupper_l() guard test. When using the default
- * collation, we apply the traditional Postgres behavior that
- * forces ASCII-style treatment of I/i, but in non-default
- * collations you get exactly what the collation says.
- */
- for (p = result; *p; p++)
- {
- if (mylocale->is_default)
- *p = pg_tolower((unsigned char) *p);
- else
- *p = tolower_l((unsigned char) *p, mylocale->info.lt);
- }
- }
- }
+ Assert(dst[needed] == '\0');
+ result = dst;
}
return result;
}
else
{
-#ifdef USE_ICU
- if (mylocale->provider == COLLPROVIDER_ICU)
+ const char *src = buff;
+ size_t srclen = nbytes;
+ size_t dstsize;
+ char *dst;
+ size_t needed;
+
+ /* first try buffer of equal size plus terminating NUL */
+ dstsize = srclen + 1;
+ dst = palloc(dstsize);
+
+ needed = pg_strupper(dst, dstsize, src, srclen, mylocale);
+ if (needed + 1 > dstsize)
{
- int32_t len_uchar,
- len_conv;
- UChar *buff_uchar;
- UChar *buff_conv;
-
- len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
- len_conv = icu_convert_case(u_strToUpper, mylocale,
- &buff_conv, buff_uchar, len_uchar);
- icu_from_uchar(&result, buff_conv, len_conv);
- pfree(buff_uchar);
- pfree(buff_conv);
+ /* grow buffer if needed and retry */
+ dstsize = needed + 1;
+ dst = repalloc(dst, dstsize);
+ needed = pg_strupper(dst, dstsize, src, srclen, mylocale);
+ Assert(needed + 1 <= dstsize);
}
- else
-#endif
- if (mylocale->provider == COLLPROVIDER_BUILTIN)
- {
- const char *src = buff;
- size_t srclen = nbytes;
- size_t dstsize;
- char *dst;
- size_t needed;
-
- Assert(GetDatabaseEncoding() == PG_UTF8);
-
- /* first try buffer of equal size plus terminating NUL */
- dstsize = srclen + 1;
- dst = palloc(dstsize);
-
- needed = unicode_strupper(dst, dstsize, src, srclen);
- if (needed + 1 > dstsize)
- {
- /* grow buffer if needed and retry */
- dstsize = needed + 1;
- dst = repalloc(dst, dstsize);
- needed = unicode_strupper(dst, dstsize, src, srclen);
- Assert(needed + 1 == dstsize);
- }
-
- Assert(dst[needed] == '\0');
- result = dst;
- }
- else
- {
- Assert(mylocale->provider == COLLPROVIDER_LIBC);
-
- if (pg_database_encoding_max_length() > 1)
- {
- wchar_t *workspace;
- size_t curr_char;
- size_t result_size;
-
- /* Overflow paranoia */
- if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
-
- /* Output workspace cannot have more codes than input bytes */
- workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
- char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
-
- for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
-
- /*
- * Make result large enough; case change might change number
- * of bytes
- */
- result_size = curr_char * pg_database_encoding_max_length() + 1;
- result = palloc(result_size);
- wchar2char(result, workspace, result_size, mylocale);
- pfree(workspace);
- }
- else
- {
- char *p;
-
- result = pnstrdup(buff, nbytes);
-
- /*
- * Note: we assume that toupper_l() will not be so broken as
- * to need an islower_l() guard test. When using the default
- * collation, we apply the traditional Postgres behavior that
- * forces ASCII-style treatment of I/i, but in non-default
- * collations you get exactly what the collation says.
- */
- for (p = result; *p; p++)
- {
- if (mylocale->is_default)
- *p = pg_toupper((unsigned char) *p);
- else
- *p = toupper_l((unsigned char) *p, mylocale->info.lt);
- }
- }
- }
+ Assert(dst[needed] == '\0');
+ result = dst;
}
return result;
}
-struct WordBoundaryState
-{
- const char *str;
- size_t len;
- size_t offset;
- bool init;
- bool prev_alnum;
-};
-
-/*
- * Simple word boundary iterator that draws boundaries each time the result of
- * pg_u_isalnum() changes.
- */
-static size_t
-initcap_wbnext(void *state)
-{
- struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
-
- while (wbstate->offset < wbstate->len &&
- wbstate->str[wbstate->offset] != '\0')
- {
- pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
- wbstate->offset);
- bool curr_alnum = pg_u_isalnum(u, true);
-
- if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
- {
- size_t prev_offset = wbstate->offset;
-
- wbstate->init = true;
- wbstate->offset += unicode_utf8len(u);
- wbstate->prev_alnum = curr_alnum;
- return prev_offset;
- }
-
- wbstate->offset += unicode_utf8len(u);
- }
-
- return wbstate->len;
-}
-
/*
* collation-aware, wide-character-aware initcap function
*
str_initcap(const char *buff, size_t nbytes, Oid collid)
{
char *result;
- int wasalnum = false;
pg_locale_t mylocale;
if (!buff)
}
else
{
-#ifdef USE_ICU
- if (mylocale->provider == COLLPROVIDER_ICU)
+ const char *src = buff;
+ size_t srclen = nbytes;
+ size_t dstsize;
+ char *dst;
+ size_t needed;
+
+ /* first try buffer of equal size plus terminating NUL */
+ dstsize = srclen + 1;
+ dst = palloc(dstsize);
+
+ needed = pg_strtitle(dst, dstsize, src, srclen, mylocale);
+ if (needed + 1 > dstsize)
{
- int32_t len_uchar,
- len_conv;
- UChar *buff_uchar;
- UChar *buff_conv;
-
- len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
- len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
- &buff_conv, buff_uchar, len_uchar);
- icu_from_uchar(&result, buff_conv, len_conv);
- pfree(buff_uchar);
- pfree(buff_conv);
+ /* grow buffer if needed and retry */
+ dstsize = needed + 1;
+ dst = repalloc(dst, dstsize);
+ needed = pg_strtitle(dst, dstsize, src, srclen, mylocale);
+ Assert(needed + 1 <= dstsize);
}
- else
-#endif
- if (mylocale->provider == COLLPROVIDER_BUILTIN)
- {
- const char *src = buff;
- size_t srclen = nbytes;
- size_t dstsize;
- char *dst;
- size_t needed;
- struct WordBoundaryState wbstate = {
- .str = src,
- .len = srclen,
- .offset = 0,
- .init = false,
- .prev_alnum = false,
- };
-
- Assert(GetDatabaseEncoding() == PG_UTF8);
-
- /* first try buffer of equal size plus terminating NUL */
- dstsize = srclen + 1;
- dst = palloc(dstsize);
-
- needed = unicode_strtitle(dst, dstsize, src, srclen,
- initcap_wbnext, &wbstate);
- if (needed + 1 > dstsize)
- {
- /* reset iterator */
- wbstate.offset = 0;
- wbstate.init = false;
-
- /* grow buffer if needed and retry */
- dstsize = needed + 1;
- dst = repalloc(dst, dstsize);
- needed = unicode_strtitle(dst, dstsize, src, srclen,
- initcap_wbnext, &wbstate);
- Assert(needed + 1 == dstsize);
- }
- result = dst;
- }
- else
- {
- Assert(mylocale->provider == COLLPROVIDER_LIBC);
-
- if (pg_database_encoding_max_length() > 1)
- {
- wchar_t *workspace;
- size_t curr_char;
- size_t result_size;
-
- /* Overflow paranoia */
- if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
-
- /* Output workspace cannot have more codes than input bytes */
- workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
- char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
-
- for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- {
- if (wasalnum)
- workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
- else
- workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
- wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
- }
-
- /*
- * Make result large enough; case change might change number
- * of bytes
- */
- result_size = curr_char * pg_database_encoding_max_length() + 1;
- result = palloc(result_size);
-
- wchar2char(result, workspace, result_size, mylocale);
- pfree(workspace);
- }
- else
- {
- char *p;
-
- result = pnstrdup(buff, nbytes);
-
- /*
- * Note: we assume that toupper_l()/tolower_l() will not be so
- * broken as to need guard tests. When using the default
- * collation, we apply the traditional Postgres behavior that
- * forces ASCII-style treatment of I/i, but in non-default
- * collations you get exactly what the collation says.
- */
- for (p = result; *p; p++)
- {
- if (mylocale->is_default)
- {
- if (wasalnum)
- *p = pg_tolower((unsigned char) *p);
- else
- *p = pg_toupper((unsigned char) *p);
- }
- else
- {
- if (wasalnum)
- *p = tolower_l((unsigned char) *p, mylocale->info.lt);
- else
- *p = toupper_l((unsigned char) *p, mylocale->info.lt);
- }
- wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
- }
- }
- }
+ Assert(dst[needed] == '\0');
+ result = dst;
}
return result;
const char *src, ssize_t srclen,
pg_locale_t locale);
+extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+
+extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+
+extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+
/* GUC settings */
char *locale_messages;
char *locale_monetary;
return collversion;
}
+size_t
+pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ if (locale->provider == COLLPROVIDER_BUILTIN)
+ return strlower_builtin(dst, dstsize, src, srclen, locale);
+#ifdef USE_ICU
+ else if (locale->provider == COLLPROVIDER_ICU)
+ return strlower_icu(dst, dstsize, src, srclen, locale);
+#endif
+ else if (locale->provider == COLLPROVIDER_LIBC)
+ return strlower_libc(dst, dstsize, src, srclen, locale);
+ else
+ /* shouldn't happen */
+ PGLOCALE_SUPPORT_ERROR(locale->provider);
+
+ return 0; /* keep compiler quiet */
+}
+
+size_t
+pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ if (locale->provider == COLLPROVIDER_BUILTIN)
+ return strtitle_builtin(dst, dstsize, src, srclen, locale);
+#ifdef USE_ICU
+ else if (locale->provider == COLLPROVIDER_ICU)
+ return strtitle_icu(dst, dstsize, src, srclen, locale);
+#endif
+ else if (locale->provider == COLLPROVIDER_LIBC)
+ return strtitle_libc(dst, dstsize, src, srclen, locale);
+ else
+ /* shouldn't happen */
+ PGLOCALE_SUPPORT_ERROR(locale->provider);
+
+ return 0; /* keep compiler quiet */
+}
+
+size_t
+pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ if (locale->provider == COLLPROVIDER_BUILTIN)
+ return strupper_builtin(dst, dstsize, src, srclen, locale);
+#ifdef USE_ICU
+ else if (locale->provider == COLLPROVIDER_ICU)
+ return strupper_icu(dst, dstsize, src, srclen, locale);
+#endif
+ else if (locale->provider == COLLPROVIDER_LIBC)
+ return strupper_libc(dst, dstsize, src, srclen, locale);
+ else
+ /* shouldn't happen */
+ PGLOCALE_SUPPORT_ERROR(locale->provider);
+
+ return 0; /* keep compiler quiet */
+}
+
/*
* pg_strcoll
*
#include "catalog/pg_database.h"
#include "catalog/pg_collation.h"
+#include "common/unicode_case.h"
+#include "common/unicode_category.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
extern pg_locale_t create_pg_locale_builtin(Oid collid,
MemoryContext context);
+extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+
+
+struct WordBoundaryState
+{
+ const char *str;
+ size_t len;
+ size_t offset;
+ bool init;
+ bool prev_alnum;
+};
+
+/*
+ * Simple word boundary iterator that draws boundaries each time the result of
+ * pg_u_isalnum() changes.
+ */
+static size_t
+initcap_wbnext(void *state)
+{
+ struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
+
+ while (wbstate->offset < wbstate->len &&
+ wbstate->str[wbstate->offset] != '\0')
+ {
+ pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
+ wbstate->offset);
+ bool curr_alnum = pg_u_isalnum(u, true);
+
+ if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
+ {
+ size_t prev_offset = wbstate->offset;
+
+ wbstate->init = true;
+ wbstate->offset += unicode_utf8len(u);
+ wbstate->prev_alnum = curr_alnum;
+ return prev_offset;
+ }
+
+ wbstate->offset += unicode_utf8len(u);
+ }
+
+ return wbstate->len;
+}
+
+size_t
+strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ return unicode_strlower(dest, destsize, src, srclen);
+}
+
+size_t
+strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ struct WordBoundaryState wbstate = {
+ .str = src,
+ .len = srclen,
+ .offset = 0,
+ .init = false,
+ .prev_alnum = false,
+ };
+
+ return unicode_strtitle(dest, destsize, src, srclen,
+ initcap_wbnext, &wbstate);
+}
+
+size_t
+strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ return unicode_strupper(dest, destsize, src, srclen);
+}
pg_locale_t
create_pg_locale_builtin(Oid collid, MemoryContext context)
#define TEXTBUFLEN 1024
extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
+extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
#ifdef USE_ICU
const char *src, ssize_t srclen,
pg_locale_t locale);
+typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
/*
* Converter object for converting between ICU's UChar strings and C strings
* in database encoding. Since the database encoding doesn't change, we only
static int32_t uchar_convert(UConverter *converter,
UChar *dest, int32_t destlen,
const char *src, int32_t srclen);
+static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff,
+ size_t nbytes);
+static size_t icu_from_uchar(char *dest, size_t destsize,
+ const UChar *buff_uchar, int32_t len_uchar);
static void icu_set_collation_attributes(UCollator *collator, const char *loc,
UErrorCode *status);
+static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
+ UChar **buff_dest, UChar *buff_source,
+ int32_t len_source);
+static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
#endif
pg_locale_t
}
}
+size_t
+strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ int32_t len_uchar;
+ int32_t len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+ size_t result_len;
+
+ len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
+ len_conv = icu_convert_case(u_strToLower, locale,
+ &buff_conv, buff_uchar, len_uchar);
+ result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
+ pfree(buff_uchar);
+ pfree(buff_conv);
+
+ return result_len;
+}
+
+size_t
+strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ int32_t len_uchar;
+ int32_t len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+ size_t result_len;
+
+ len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
+ len_conv = icu_convert_case(u_strToTitle_default_BI, locale,
+ &buff_conv, buff_uchar, len_uchar);
+ result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
+ pfree(buff_uchar);
+ pfree(buff_conv);
+
+ return result_len;
+}
+
+size_t
+strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ int32_t len_uchar;
+ int32_t len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+ size_t result_len;
+
+ len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
+ len_conv = icu_convert_case(u_strToUpper, locale,
+ &buff_conv, buff_uchar, len_uchar);
+ result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
+ pfree(buff_uchar);
+ pfree(buff_conv);
+
+ return result_len;
+}
+
/*
* strncoll_icu
*
* The result string is nul-terminated, though most callers rely on the
* result length instead.
*/
-int32_t
+static int32_t
icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
{
int32_t len_uchar;
*
* The result string is nul-terminated.
*/
-int32_t
-icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
+static size_t
+icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar)
{
UErrorCode status;
int32_t len_result;
(errmsg("%s failed: %s", "ucnv_fromUChars",
u_errorName(status))));
- *result = palloc(len_result + 1);
+ if (len_result + 1 > destsize)
+ return len_result;
status = U_ZERO_ERROR;
- len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
+ len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1,
buff_uchar, len_uchar, &status);
if (U_FAILURE(status) ||
status == U_STRING_NOT_TERMINATED_WARNING)
return len_result;
}
+static int32_t
+icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
+ UChar **buff_dest, UChar *buff_source, int32_t len_source)
+{
+ UErrorCode status;
+ int32_t len_dest;
+
+ len_dest = len_source; /* try first with same length */
+ *buff_dest = palloc(len_dest * sizeof(**buff_dest));
+ status = U_ZERO_ERROR;
+ len_dest = func(*buff_dest, len_dest, buff_source, len_source,
+ mylocale->info.icu.locale, &status);
+ if (status == U_BUFFER_OVERFLOW_ERROR)
+ {
+ /* try again with adjusted length */
+ pfree(*buff_dest);
+ *buff_dest = palloc(len_dest * sizeof(**buff_dest));
+ status = U_ZERO_ERROR;
+ len_dest = func(*buff_dest, len_dest, buff_source, len_source,
+ mylocale->info.icu.locale, &status);
+ }
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("case conversion failed: %s", u_errorName(status))));
+ return len_dest;
+}
+
+static int32_t
+u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode)
+{
+ return u_strToTitle(dest, destCapacity, src, srcLength,
+ NULL, locale, pErrorCode);
+}
+
/*
* strncoll_icu_no_utf8
*
#include "postgres.h"
+#include <limits.h>
+#include <wctype.h>
+
#include "access/htup_details.h"
#include "catalog/pg_database.h"
#include "catalog/pg_collation.h"
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
+extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+
extern int strncoll_libc(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
pg_locale_t locale);
#endif
+static size_t strlower_libc_sb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+static size_t strlower_libc_mb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+static size_t strtitle_libc_sb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+static size_t strtitle_libc_mb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+static size_t strupper_libc_sb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+static size_t strupper_libc_mb(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+
+size_t
+strlower_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale)
+{
+ if (pg_database_encoding_max_length() > 1)
+ return strlower_libc_mb(dst, dstsize, src, srclen, locale);
+ else
+ return strlower_libc_sb(dst, dstsize, src, srclen, locale);
+}
+
+size_t
+strtitle_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale)
+{
+ if (pg_database_encoding_max_length() > 1)
+ return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
+ else
+ return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
+}
+
+size_t
+strupper_libc(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale)
+{
+ if (pg_database_encoding_max_length() > 1)
+ return strupper_libc_mb(dst, dstsize, src, srclen, locale);
+ else
+ return strupper_libc_sb(dst, dstsize, src, srclen, locale);
+}
+
+static size_t
+strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ if (srclen + 1 <= destsize)
+ {
+ locale_t loc = locale->info.lt;
+ char *p;
+
+ if (srclen + 1 > destsize)
+ return srclen;
+
+ memcpy(dest, src, srclen);
+ dest[srclen] = '\0';
+
+ /*
+ * Note: we assume that tolower_l() will not be so broken as to need
+ * an isupper_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
+ for (p = dest; *p; p++)
+ {
+ if (locale->is_default)
+ *p = pg_tolower((unsigned char) *p);
+ else
+ *p = tolower_l((unsigned char) *p, loc);
+ }
+ }
+
+ return srclen;
+}
+
+static size_t
+strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ locale_t loc = locale->info.lt;
+ size_t result_size;
+ wchar_t *workspace;
+ char *result;
+ size_t curr_char;
+ size_t max_size;
+
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ /* Overflow paranoia */
+ if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
+
+ char2wchar(workspace, srclen + 1, src, srclen, locale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ workspace[curr_char] = towlower_l(workspace[curr_char], loc);
+
+ /*
+ * Make result large enough; case change might change number of bytes
+ */
+ max_size = curr_char * pg_database_encoding_max_length();
+ result = palloc(max_size + 1);
+
+ result_size = wchar2char(result, workspace, max_size + 1, locale);
+
+ if (result_size + 1 > destsize)
+ return result_size;
+
+ memcpy(dest, result, result_size);
+ dest[result_size] = '\0';
+
+ pfree(workspace);
+ pfree(result);
+
+ return result_size;
+}
+
+static size_t
+strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ if (srclen + 1 <= destsize)
+ {
+ locale_t loc = locale->info.lt;
+ int wasalnum = false;
+ char *p;
+
+ memcpy(dest, src, srclen);
+ dest[srclen] = '\0';
+
+ /*
+ * Note: we assume that toupper_l()/tolower_l() will not be so broken
+ * as to need guard tests. When using the default collation, we apply
+ * the traditional Postgres behavior that forces ASCII-style treatment
+ * of I/i, but in non-default collations you get exactly what the
+ * collation says.
+ */
+ for (p = dest; *p; p++)
+ {
+ if (locale->is_default)
+ {
+ if (wasalnum)
+ *p = pg_tolower((unsigned char) *p);
+ else
+ *p = pg_toupper((unsigned char) *p);
+ }
+ else
+ {
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, loc);
+ else
+ *p = toupper_l((unsigned char) *p, loc);
+ }
+ wasalnum = isalnum_l((unsigned char) *p, loc);
+ }
+ }
+
+ return srclen;
+}
+
+static size_t
+strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ locale_t loc = locale->info.lt;
+ int wasalnum = false;
+ size_t result_size;
+ wchar_t *workspace;
+ char *result;
+ size_t curr_char;
+ size_t max_size;
+
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ /* Overflow paranoia */
+ if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
+
+ char2wchar(workspace, srclen + 1, src, srclen, locale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
+ if (wasalnum)
+ workspace[curr_char] = towlower_l(workspace[curr_char], loc);
+ else
+ workspace[curr_char] = towupper_l(workspace[curr_char], loc);
+ wasalnum = iswalnum_l(workspace[curr_char], loc);
+ }
+
+ /*
+ * Make result large enough; case change might change number of bytes
+ */
+ max_size = curr_char * pg_database_encoding_max_length();
+ result = palloc(max_size + 1);
+
+ result_size = wchar2char(result, workspace, max_size + 1, locale);
+
+ if (result_size + 1 > destsize)
+ return result_size;
+
+ memcpy(dest, result, result_size);
+ dest[result_size] = '\0';
+
+ pfree(workspace);
+ pfree(result);
+
+ return result_size;
+}
+
+static size_t
+strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ if (srclen + 1 <= destsize)
+ {
+ locale_t loc = locale->info.lt;
+ char *p;
+
+ memcpy(dest, src, srclen);
+ dest[srclen] = '\0';
+
+ /*
+ * Note: we assume that toupper_l() will not be so broken as to need
+ * an islower_l() guard test. When using the default collation, we
+ * apply the traditional Postgres behavior that forces ASCII-style
+ * treatment of I/i, but in non-default collations you get exactly
+ * what the collation says.
+ */
+ for (p = dest; *p; p++)
+ {
+ if (locale->is_default)
+ *p = pg_toupper((unsigned char) *p);
+ else
+ *p = toupper_l((unsigned char) *p, loc);
+ }
+ }
+
+ return srclen;
+}
+
+static size_t
+strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ locale_t loc = locale->info.lt;
+ size_t result_size;
+ wchar_t *workspace;
+ char *result;
+ size_t curr_char;
+ size_t max_size;
+
+ if (srclen < 0)
+ srclen = strlen(src);
+
+ /* Overflow paranoia */
+ if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
+
+ char2wchar(workspace, srclen + 1, src, srclen, locale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ workspace[curr_char] = towupper_l(workspace[curr_char], loc);
+
+ /*
+ * Make result large enough; case change might change number of bytes
+ */
+ max_size = curr_char * pg_database_encoding_max_length();
+ result = palloc(max_size + 1);
+
+ result_size = wchar2char(result, workspace, max_size + 1, locale);
+
+ if (result_size + 1 > destsize)
+ return result_size;
+
+ memcpy(dest, result, result_size);
+ dest[result_size] = '\0';
+
+ pfree(workspace);
+ pfree(result);
+
+ return result_size;
+}
+
pg_locale_t
create_pg_locale_libc(Oid collid, MemoryContext context)
{
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
+extern size_t pg_strlower(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+extern size_t pg_strtitle(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+extern size_t pg_strupper(char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale);
extern int pg_strncoll(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2, pg_locale_t locale);
extern void icu_validate_locale(const char *loc_str);
extern char *icu_language_tag(const char *loc_str, int elevel);
-#ifdef USE_ICU
-extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
-extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
-#endif
-
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
pg_locale_t locale);