Fix up handling of nondeterministic collations with pattern_ops opclasses.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 21 Sep 2019 20:29:17 +0000 (16:29 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 21 Sep 2019 20:29:17 +0000 (16:29 -0400)
text_pattern_ops and its siblings can't be used with nondeterministic
collations, because they use the text_eq operator which will not behave
as bitwise equality if applied with a nondeterministic collation.  The
initial implementation of that restriction was to insert a run-time test
in the related comparison functions, but that is inefficient, may throw
misleading errors, and will throw errors in some cases that would work.
It seems sufficient to just prevent the combination during CREATE INDEX,
so do that instead.

Lacking any better way to identify the opclasses involved, we need to
hard-wire tests for them, which requires hand-assigned values for their
OIDs, which forces a catversion bump because they previously had OIDs
that would be assigned automatically.  That's slightly annoying in the
v12 branch, but fortunately we're not at rc1 yet, so just do it.

Back-patch to v12 where nondeterministic collations were added.

In passing, run make reformat-dat-files, which found some unrelated
whitespace issues (slightly different ones in HEAD and v12).

Peter Eisentraut, with small corrections by me

Discussion: https://postgr.es/m/22566.1568675619@sss.pgh.pa.us

src/backend/catalog/index.c
src/backend/utils/adt/varchar.c
src/backend/utils/adt/varlena.c
src/include/catalog/catversion.h
src/include/catalog/pg_opclass.dat
src/include/catalog/pg_operator.dat

index 54288a498c1057606d6d56aed10bad5e6940fecb..098732cc4a844f4260c9cb1d7428ed13e68a1984 100644 (file)
@@ -762,6 +762,51 @@ index_create(Relation heapRelation,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("user-defined indexes on system catalog tables are not supported")));
 
+   /*
+    * Btree text_pattern_ops uses text_eq as the equality operator, which is
+    * fine as long as the collation is deterministic; text_eq then reduces to
+    * bitwise equality and so it is semantically compatible with the other
+    * operators and functions in that opclass.  But with a nondeterministic
+    * collation, text_eq could yield results that are incompatible with the
+    * actual behavior of the index (which is determined by the opclass's
+    * comparison function).  We prevent such problems by refusing creation of
+    * an index with that opclass and a nondeterministic collation.
+    *
+    * The same applies to varchar_pattern_ops and bpchar_pattern_ops.  If we
+    * find more cases, we might decide to create a real mechanism for marking
+    * opclasses as incompatible with nondeterminism; but for now, this small
+    * hack suffices.
+    *
+    * Another solution is to use a special operator, not text_eq, as the
+    * equality opclass member; but that is undesirable because it would
+    * prevent index usage in many queries that work fine today.
+    */
+   for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
+   {
+       Oid         collation = collationObjectId[i];
+       Oid         opclass = classObjectId[i];
+
+       if (collation)
+       {
+           if ((opclass == TEXT_BTREE_PATTERN_OPS_OID ||
+                opclass == VARCHAR_BTREE_PATTERN_OPS_OID ||
+                opclass == BPCHAR_BTREE_PATTERN_OPS_OID) &&
+               !get_collation_isdeterministic(collation))
+           {
+               HeapTuple   classtup;
+
+               classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
+               if (!HeapTupleIsValid(classtup))
+                   elog(ERROR, "cache lookup failed for operator class %u", opclass);
+               ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("nondeterministic collations are not supported for operator class \"%s\"",
+                               NameStr(((Form_pg_opclass) GETSTRUCT(classtup))->opcname))));
+               ReleaseSysCache(classtup);
+           }
+       }
+   }
+
    /*
     * Concurrent index build on a system catalog is unsafe because we tend to
     * release locks before committing in catalogs.
index 9d9439932308b179894b5bc3ebd2f4e10f5c236e..e63a4e553b2bfc5a2cbf001f116b6e1c12500ded 100644 (file)
@@ -1105,23 +1105,12 @@ hashbpcharextended(PG_FUNCTION_ARGS)
  */
 
 static int
-internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2, Oid collid)
+internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
 {
    int         result;
    int         len1,
                len2;
 
-   check_collation_set(collid);
-
-   /*
-    * see internal_text_pattern_compare()
-    */
-   if (!get_collation_isdeterministic(collid))
-       ereport(ERROR,
-               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("nondeterministic collations are not supported for operator class \"%s\"",
-                       "bpchar_pattern_ops")));
-
    len1 = bcTruelen(arg1);
    len2 = bcTruelen(arg2);
 
@@ -1144,7 +1133,7 @@ bpchar_pattern_lt(PG_FUNCTION_ARGS)
    BpChar     *arg2 = PG_GETARG_BPCHAR_PP(1);
    int         result;
 
-   result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
+   result = internal_bpchar_pattern_compare(arg1, arg2);
 
    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);
@@ -1160,7 +1149,7 @@ bpchar_pattern_le(PG_FUNCTION_ARGS)
    BpChar     *arg2 = PG_GETARG_BPCHAR_PP(1);
    int         result;
 
-   result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
+   result = internal_bpchar_pattern_compare(arg1, arg2);
 
    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);
@@ -1176,7 +1165,7 @@ bpchar_pattern_ge(PG_FUNCTION_ARGS)
    BpChar     *arg2 = PG_GETARG_BPCHAR_PP(1);
    int         result;
 
-   result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
+   result = internal_bpchar_pattern_compare(arg1, arg2);
 
    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);
@@ -1192,7 +1181,7 @@ bpchar_pattern_gt(PG_FUNCTION_ARGS)
    BpChar     *arg2 = PG_GETARG_BPCHAR_PP(1);
    int         result;
 
-   result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
+   result = internal_bpchar_pattern_compare(arg1, arg2);
 
    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);
@@ -1208,7 +1197,7 @@ btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
    BpChar     *arg2 = PG_GETARG_BPCHAR_PP(1);
    int         result;
 
-   result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION());
+   result = internal_bpchar_pattern_compare(arg1, arg2);
 
    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);
@@ -1221,17 +1210,8 @@ Datum
 btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
 {
    SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
-   Oid         collid = ssup->ssup_collation;
    MemoryContext oldcontext;
 
-   check_collation_set(collid);
-
-   if (!get_collation_isdeterministic(collid))
-       ereport(ERROR,
-               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("nondeterministic collations are not supported for operator class \"%s\"",
-                       "bpchar_pattern_ops")));
-
    oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
 
    /* Use generic string SortSupport, forcing "C" collation */
index d36156f4e4d763f95f23e1ead265cb8c4391a809..2480074813d931dbc7e0b8da2ab9a4086a8e488e 100644 (file)
@@ -2996,34 +2996,12 @@ textgename(PG_FUNCTION_ARGS)
  */
 
 static int
-internal_text_pattern_compare(text *arg1, text *arg2, Oid collid)
+internal_text_pattern_compare(text *arg1, text *arg2)
 {
    int         result;
    int         len1,
                len2;
 
-   check_collation_set(collid);
-
-   /*
-    * XXX We cannot use a text_pattern_ops index for nondeterministic
-    * collations, because these operators intentionally ignore the collation.
-    * However, the planner has no way to know that, so it might choose such
-    * an index for an "=" clause, which would lead to wrong results.  This
-    * check here doesn't prevent choosing the index, but it will at least
-    * error out if the index is chosen.  A text_pattern_ops index on a column
-    * with nondeterministic collation is pretty useless anyway, since LIKE
-    * etc. won't work there either.  A future possibility would be to
-    * annotate the operator class or its members in the catalog to avoid the
-    * index.  Another alternative is to stay away from the *_pattern_ops
-    * operator classes and prefer creating LIKE-supporting indexes with
-    * COLLATE "C".
-    */
-   if (!get_collation_isdeterministic(collid))
-       ereport(ERROR,
-               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("nondeterministic collations are not supported for operator class \"%s\"",
-                       "text_pattern_ops")));
-
    len1 = VARSIZE_ANY_EXHDR(arg1);
    len2 = VARSIZE_ANY_EXHDR(arg2);
 
@@ -3046,7 +3024,7 @@ text_pattern_lt(PG_FUNCTION_ARGS)
    text       *arg2 = PG_GETARG_TEXT_PP(1);
    int         result;
 
-   result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
+   result = internal_text_pattern_compare(arg1, arg2);
 
    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);
@@ -3062,7 +3040,7 @@ text_pattern_le(PG_FUNCTION_ARGS)
    text       *arg2 = PG_GETARG_TEXT_PP(1);
    int         result;
 
-   result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
+   result = internal_text_pattern_compare(arg1, arg2);
 
    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);
@@ -3078,7 +3056,7 @@ text_pattern_ge(PG_FUNCTION_ARGS)
    text       *arg2 = PG_GETARG_TEXT_PP(1);
    int         result;
 
-   result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
+   result = internal_text_pattern_compare(arg1, arg2);
 
    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);
@@ -3094,7 +3072,7 @@ text_pattern_gt(PG_FUNCTION_ARGS)
    text       *arg2 = PG_GETARG_TEXT_PP(1);
    int         result;
 
-   result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
+   result = internal_text_pattern_compare(arg1, arg2);
 
    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);
@@ -3110,7 +3088,7 @@ bttext_pattern_cmp(PG_FUNCTION_ARGS)
    text       *arg2 = PG_GETARG_TEXT_PP(1);
    int         result;
 
-   result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION());
+   result = internal_text_pattern_compare(arg1, arg2);
 
    PG_FREE_IF_COPY(arg1, 0);
    PG_FREE_IF_COPY(arg2, 1);
@@ -3123,17 +3101,8 @@ Datum
 bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
 {
    SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
-   Oid         collid = ssup->ssup_collation;
    MemoryContext oldcontext;
 
-   check_collation_set(collid);
-
-   if (!get_collation_isdeterministic(collid))
-       ereport(ERROR,
-               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("nondeterministic collations are not supported for operator class \"%s\"",
-                       "text_pattern_ops")));
-
    oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
 
    /* Use generic string SortSupport, forcing "C" collation */
index 00cc71dcd12c857d409ebd79402fbb0425e924f3..318d99ce63bd08904b91d09845a4e1edae3d65c3 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 201909071
+#define CATALOG_VERSION_NO 201909213
 
 #endif
index fdfea85efedb33a5fe4a992320bef0370babef9c..2d575102efaa8ef62d75ed7bc336b991dd876617 100644 (file)
   opcfamily => 'btree/datetime_ops', opcintype => 'timestamp' },
 { opcmethod => 'hash', opcname => 'timestamp_ops',
   opcfamily => 'hash/timestamp_ops', opcintype => 'timestamp' },
-{ opcmethod => 'btree', opcname => 'text_pattern_ops',
+{ oid => '4217', oid_symbol => 'TEXT_BTREE_PATTERN_OPS_OID',
+  opcmethod => 'btree', opcname => 'text_pattern_ops',
   opcfamily => 'btree/text_pattern_ops', opcintype => 'text',
   opcdefault => 'f' },
-{ opcmethod => 'btree', opcname => 'varchar_pattern_ops',
+{ oid => '4218', oid_symbol => 'VARCHAR_BTREE_PATTERN_OPS_OID',
+  opcmethod => 'btree', opcname => 'varchar_pattern_ops',
   opcfamily => 'btree/text_pattern_ops', opcintype => 'text',
   opcdefault => 'f' },
-{ opcmethod => 'btree', opcname => 'bpchar_pattern_ops',
+{ oid => '4219', oid_symbol => 'BPCHAR_BTREE_PATTERN_OPS_OID',
+  opcmethod => 'btree', opcname => 'bpchar_pattern_ops',
   opcfamily => 'btree/bpchar_pattern_ops', opcintype => 'bpchar',
   opcdefault => 'f' },
 { opcmethod => 'btree', opcname => 'money_ops', opcfamily => 'btree/money_ops',
index 96823cd59b4d52acf5de1b4acc93a1fc3af58685..fa7dc96ece62e9f1e45ff4d9c90c08b5e79aa589 100644 (file)
 
 { oid => '613', descr => 'distance between',
   oprname => '<->', oprleft => 'point', oprright => 'line',
-  oprresult => 'float8', oprcom => '<->(line,point)',oprcode => 'dist_pl' },
+  oprresult => 'float8', oprcom => '<->(line,point)', oprcode => 'dist_pl' },
 { oid => '760', descr => 'distance between',
   oprname => '<->', oprleft => 'line', oprright => 'point',
   oprresult => 'float8', oprcom => '<->(point,line)', oprcode => 'dist_lp' },
 { oid => '614', descr => 'distance between',
   oprname => '<->', oprleft => 'point', oprright => 'lseg',
-  oprresult => 'float8', oprcom => '<->(lseg,point)',oprcode => 'dist_ps' },
+  oprresult => 'float8', oprcom => '<->(lseg,point)', oprcode => 'dist_ps' },
 { oid => '761', descr => 'distance between',
   oprname => '<->', oprleft => 'lseg', oprright => 'point',
   oprresult => 'float8', oprcom => '<->(point,lseg)', oprcode => 'dist_sp' },