Allow Memoize to operate in binary comparison mode

author David Rowley <drowley@postgresql.org>

Tue, 23 Nov 2021 21:06:59 +0000 (10:06 +1300)

committer David Rowley <drowley@postgresql.org>

Tue, 23 Nov 2021 21:06:59 +0000 (10:06 +1300)
author David Rowley <drowley@postgresql.org>
Tue, 23 Nov 2021 21:06:59 +0000 (10:06 +1300)
committer David Rowley <drowley@postgresql.org>
Tue, 23 Nov 2021 21:06:59 +0000 (10:06 +1300)
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out

index 786781db4b090a456a34a1d450232f4b9ce50ae0..5196e4797a6ccb6c737c90184f3939bb3052770d 100644 (file)
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2247,6 +2247,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
                 Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
           ->  Memoize
                 Cache Key: t1.c2
+               Cache Mode: binary
                 ->  Subquery Scan on q
                       ->  HashAggregate
                             Output: t2.c1, t3.c1
@@ -2255,7 +2256,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
                                   Output: t2.c1, t3.c1
                                   Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
                                   Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer))))
-(16 rows)
+(17 rows)
  
  SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
   C 1 
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c

index 10644dfac44ef0aa65706747ebab024daf9dc056..09f5253abb9c14950c0ad3a9514dec668a9ff86d 100644 (file)
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3127,11 +3127,14 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
     if (es->format != EXPLAIN_FORMAT_TEXT)
     {
         ExplainPropertyText("Cache Key", keystr.data, es);
+       ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
     }
     else
     {
         ExplainIndentText(es);
         appendStringInfo(es->str, "Cache Key: %s\n", keystr.data);
+       ExplainIndentText(es);
+       appendStringInfo(es->str, "Cache Mode: %s\n", mstate->binary_mode ? "binary" : "logical");
     }
  
     pfree(keystr.data);
diff --git a/src/backend/executor/nodeMemoize.c b/src/backend/executor/nodeMemoize.c

index bec588b3a041e84ebe34764d05204293a5889f04..683502dd90e1b6f4444712725c454d599e65c602 100644 (file)
--- a/src/backend/executor/nodeMemoize.c
+++ b/src/backend/executor/nodeMemoize.c
@@ -71,6 +71,7 @@
  #include "executor/nodeMemoize.h"
  #include "lib/ilist.h"
  #include "miscadmin.h"
+#include "utils/datum.h"
  #include "utils/lsyscache.h"
  
  /* States of the ExecMemoize state machine */
@@ -131,7 +132,7 @@ typedef struct MemoizeEntry
  
  static uint32 MemoizeHash_hash(struct memoize_hash *tb,
                                const MemoizeKey *key);
-static int MemoizeHash_equal(struct memoize_hash *tb,
+static bool MemoizeHash_equal(struct memoize_hash *tb,
                               const MemoizeKey *params1,
                               const MemoizeKey *params2);
  
@@ -140,7 +141,7 @@ static int  MemoizeHash_equal(struct memoize_hash *tb,
  #define SH_KEY_TYPE MemoizeKey *
  #define SH_KEY key
  #define SH_HASH_KEY(tb, key) MemoizeHash_hash(tb, key)
-#define SH_EQUAL(tb, a, b) (MemoizeHash_equal(tb, a, b) == 0)
+#define SH_EQUAL(tb, a, b) MemoizeHash_equal(tb, a, b)
  #define SH_SCOPE static inline
  #define SH_STORE_HASH
  #define SH_GET_HASH(tb, a) a->hash
@@ -160,21 +161,45 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key)
     TupleTableSlot *pslot = mstate->probeslot;
     uint32      hashkey = 0;
     int         numkeys = mstate->nkeys;
-   FmgrInfo   *hashfunctions = mstate->hashfunctions;
-   Oid        *collations = mstate->collations;
  
-   for (int i = 0; i < numkeys; i++)
+   if (mstate->binary_mode)
     {
-       /* rotate hashkey left 1 bit at each step */
-       hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+       for (int i = 0; i < numkeys; i++)
+       {
+           /* rotate hashkey left 1 bit at each step */
+           hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+
+           if (!pslot->tts_isnull[i])  /* treat nulls as having hash key 0 */
+           {
+               FormData_pg_attribute *attr;
+               uint32      hkey;
+
+               attr = &pslot->tts_tupleDescriptor->attrs[i];
+
+               hkey = datum_image_hash(pslot->tts_values[i], attr->attbyval, attr->attlen);
+
+               hashkey ^= hkey;
+           }
+       }
+   }
+   else
+   {
+       FmgrInfo   *hashfunctions = mstate->hashfunctions;
+       Oid        *collations = mstate->collations;
  
-       if (!pslot->tts_isnull[i])  /* treat nulls as having hash key 0 */
+       for (int i = 0; i < numkeys; i++)
         {
-           uint32      hkey;
+           /* rotate hashkey left 1 bit at each step */
+           hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+
+           if (!pslot->tts_isnull[i])  /* treat nulls as having hash key 0 */
+           {
+               uint32      hkey;
  
-           hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
-                                                   collations[i], pslot->tts_values[i]));
-           hashkey ^= hkey;
+               hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
+                                                       collations[i], pslot->tts_values[i]));
+               hashkey ^= hkey;
+           }
         }
     }
  
@@ -187,7 +212,7 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key)
   *     table lookup.  'key2' is never used.  Instead the MemoizeState's
   *     probeslot is always populated with details of what's being looked up.
   */
-static int
+static bool
  MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1,
                   const MemoizeKey *key2)
  {
@@ -199,9 +224,38 @@ MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1,
     /* probeslot should have already been prepared by prepare_probe_slot() */
     ExecStoreMinimalTuple(key1->params, tslot, false);
  
-   econtext->ecxt_innertuple = tslot;
-   econtext->ecxt_outertuple = pslot;
-   return !ExecQualAndReset(mstate->cache_eq_expr, econtext);
+   if (mstate->binary_mode)
+   {
+       int         numkeys = mstate->nkeys;
+
+       slot_getallattrs(tslot);
+       slot_getallattrs(pslot);
+
+       for (int i = 0; i < numkeys; i++)
+       {
+           FormData_pg_attribute *attr;
+
+           if (tslot->tts_isnull[i] != pslot->tts_isnull[i])
+               return false;
+
+           /* both NULL? they're equal */
+           if (tslot->tts_isnull[i])
+               continue;
+
+           /* perform binary comparison on the two datums */
+           attr = &tslot->tts_tupleDescriptor->attrs[i];
+           if (!datum_image_eq(tslot->tts_values[i], pslot->tts_values[i],
+                               attr->attbyval, attr->attlen))
+               return false;
+       }
+       return true;
+   }
+   else
+   {
+       econtext->ecxt_innertuple = tslot;
+       econtext->ecxt_outertuple = pslot;
+       return ExecQualAndReset(mstate->cache_eq_expr, econtext);
+   }
  }
  
  /*
@@ -926,6 +980,12 @@ ExecInitMemoize(Memoize *node, EState *estate, int eflags)
      */
     mstate->singlerow = node->singlerow;
  
+   /*
+    * Record if the cache keys should be compared bit by bit, or logically
+    * using the type's hash equality operator
+    */
+   mstate->binary_mode = node->binary_mode;
+
     /* Zero the statistics counters */
     memset(&mstate->stats, 0, sizeof(MemoizeInstrumentation));
  
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index ad1ea2ff2f833b3ad061c7edcbff9a27ce02f47c..7d55fd69ab342bbeb84c65e75791ab1e67ed0508 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -971,6 +971,7 @@ _copyMemoize(const Memoize *from)
     COPY_POINTER_FIELD(collations, sizeof(Oid) * from->numKeys);
     COPY_NODE_FIELD(param_exprs);
     COPY_SCALAR_FIELD(singlerow);
+   COPY_SCALAR_FIELD(binary_mode);
     COPY_SCALAR_FIELD(est_entries);
  
     return newnode;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 23f23f11dc79d8bc0464f0156c811b81aeb23c4a..be374a0d706c51cb12a913e8b5def3c4dc1be2e7 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -866,6 +866,7 @@ _outMemoize(StringInfo str, const Memoize *node)
     WRITE_OID_ARRAY(collations, node->numKeys);
     WRITE_NODE_FIELD(param_exprs);
     WRITE_BOOL_FIELD(singlerow);
+   WRITE_BOOL_FIELD(binary_mode);
     WRITE_UINT_FIELD(est_entries);
  }
  
@@ -1966,6 +1967,7 @@ _outMemoizePath(StringInfo str, const MemoizePath *node)
     WRITE_NODE_FIELD(hash_operators);
     WRITE_NODE_FIELD(param_exprs);
     WRITE_BOOL_FIELD(singlerow);
+   WRITE_BOOL_FIELD(binary_mode);
     WRITE_FLOAT_FIELD(calls, "%.0f");
     WRITE_UINT_FIELD(est_entries);
  }
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index abf08b7a2f3b19159511e1e9a8a514f1c57970f6..a82c53ec0d2ff1480ce900b80a944becc9c6f388 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -2230,6 +2230,7 @@ _readMemoize(void)
     READ_OID_ARRAY(collations, local_node->numKeys);
     READ_NODE_FIELD(param_exprs);
     READ_BOOL_FIELD(singlerow);
+   READ_BOOL_FIELD(binary_mode);
     READ_UINT_FIELD(est_entries);
  
     READ_DONE();
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index 0f3ad8aa658b067665a0075a966da867ac80b4b3..322460e968ded34ec77ff5841d61855c70a42bfb 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -371,19 +371,21 @@ allow_star_schema_join(PlannerInfo *root,
   *     Returns true the hashing is possible, otherwise return false.
   *
   * Additionally we also collect the outer exprs and the hash operators for
- * each parameter to innerrel.  These set in 'param_exprs' and 'operators'
- * when we return true.
+ * each parameter to innerrel.  These set in 'param_exprs', 'operators' and
+ * 'binary_mode' when we return true.
   */
  static bool
  paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
                             RelOptInfo *outerrel, RelOptInfo *innerrel,
-                           List **param_exprs, List **operators)
+                           List **param_exprs, List **operators,
+                           bool *binary_mode)
  
  {
     ListCell   *lc;
  
     *param_exprs = NIL;
     *operators = NIL;
+   *binary_mode = false;
  
     if (param_info != NULL)
     {
@@ -431,6 +433,20 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
  
             *operators = lappend_oid(*operators, hasheqoperator);
             *param_exprs = lappend(*param_exprs, expr);
+
+           /*
+            * When the join operator is not hashable then it's possible that
+            * the operator will be able to distinguish something that the
+            * hash equality operator could not. For example with floating
+            * point types -0.0 and +0.0 are classed as equal by the hash
+            * function and equality function, but some other operator may be
+            * able to tell those values apart.  This means that we must put
+            * memoize into binary comparison mode so that it does bit-by-bit
+            * comparisons rather than a "logical" comparison as it would
+            * using the hash equality operator.
+            */
+           if (!OidIsValid(rinfo->hashjoinoperator))
+               *binary_mode = true;
         }
     }
  
@@ -461,6 +477,17 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
  
         *operators = lappend_oid(*operators, typentry->eq_opr);
         *param_exprs = lappend(*param_exprs, expr);
+
+       /*
+        * We must go into binary mode as we don't have too much of an idea of
+        * how these lateral Vars are being used.  See comment above when we
+        * set *binary_mode for the non-lateral Var case. This could be
+        * relaxed a bit if we had the RestrictInfos and knew the operators
+        * being used, however for cases like Vars that are arguments to
+        * functions we must operate in binary mode as we don't have
+        * visibility into what the function is doing with the Vars.
+        */
+       *binary_mode = true;
     }
  
     /* We're okay to use memoize */
@@ -481,6 +508,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
     List       *param_exprs;
     List       *hash_operators;
     ListCell   *lc;
+   bool        binary_mode;
  
     /* Obviously not if it's disabled */
     if (!enable_memoize)
@@ -572,7 +600,8 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
                                     outerrel,
                                     innerrel,
                                     &param_exprs,
-                                   &hash_operators))
+                                   &hash_operators,
+                                   &binary_mode))
     {
         return (Path *) create_memoize_path(root,
                                             innerrel,
@@ -580,6 +609,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
                                             param_exprs,
                                             hash_operators,
                                             extra->inner_unique,
+                                           binary_mode,
                                             outer_path->parent->rows);
     }
  
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 3dc0176a5169a5fb40b457971f17b42a3d980e63..866f19f64c1eaaacdbd154b2a3a467c6cf5fc0b0 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -279,7 +279,8 @@ static Sort *make_sort_from_groupcols(List *groupcls,
  static Material *make_material(Plan *lefttree);
  static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
                              Oid *collations, List *param_exprs,
-                            bool singlerow, uint32 est_entries);
+                            bool singlerow, bool binary_mode,
+                            uint32 est_entries);
  static WindowAgg *make_windowagg(List *tlist, Index winref,
                                  int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
                                  int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1617,7 +1618,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
     }
  
     plan = make_memoize(subplan, operators, collations, param_exprs,
-                       best_path->singlerow, best_path->est_entries);
+                       best_path->singlerow, best_path->binary_mode,
+                       best_path->est_entries);
  
     copy_generic_path_info(&plan->plan, (Path *) best_path);
  
@@ -6417,7 +6419,8 @@ materialize_finished_plan(Plan *subplan)
  
  static Memoize *
  make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
-            List *param_exprs, bool singlerow, uint32 est_entries)
+            List *param_exprs, bool singlerow, bool binary_mode,
+            uint32 est_entries)
  {
     Memoize    *node = makeNode(Memoize);
     Plan       *plan = &node->plan;
@@ -6432,6 +6435,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
     node->collations = collations;
     node->param_exprs = param_exprs;
     node->singlerow = singlerow;
+   node->binary_mode = binary_mode;
     node->est_entries = est_entries;
  
     return node;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index e53d381e199233b2d4a60229fddfe708475f21d5..af5e8df26b4e74894095c713209d75cc416d43a1 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1583,7 +1583,7 @@ create_material_path(RelOptInfo *rel, Path *subpath)
  MemoizePath *
  create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
                     List *param_exprs, List *hash_operators,
-                   bool singlerow, double calls)
+                   bool singlerow, bool binary_mode, double calls)
  {
     MemoizePath *pathnode = makeNode(MemoizePath);
  
@@ -1603,6 +1603,7 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
     pathnode->hash_operators = hash_operators;
     pathnode->param_exprs = param_exprs;
     pathnode->singlerow = singlerow;
+   pathnode->binary_mode = binary_mode;
     pathnode->calls = calls;
  
     /*
@@ -3942,6 +3943,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
                                                     mpath->param_exprs,
                                                     mpath->hash_operators,
                                                     mpath->singlerow,
+                                                   mpath->binary_mode,
                                                     mpath->calls);
             }
         default:
diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c

index 6a317fc0a6d0cb95cf38d09fec3dd1dd68a68941..2f229395744f8c2566514b0dccc49a34b62c5107 100644 (file)
--- a/src/backend/utils/adt/datum.c
+++ b/src/backend/utils/adt/datum.c
@@ -43,6 +43,7 @@
  #include "postgres.h"
  
  #include "access/detoast.h"
+#include "common/hashfn.h"
  #include "fmgr.h"
  #include "utils/builtins.h"
  #include "utils/datum.h"
@@ -324,6 +325,57 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
     return result;
  }
  
+/*-------------------------------------------------------------------------
+ * datum_image_hash
+ *
+ * Generate a hash value based on the binary representation of 'value'.  Most
+ * use cases will want to use the hash function specific to the Datum's type,
+ * however, some corner cases require generating a hash value based on the
+ * actual bits rather than the logical value.
+ *-------------------------------------------------------------------------
+ */
+uint32
+datum_image_hash(Datum value, bool typByVal, int typLen)
+{
+   Size        len;
+   uint32      result;
+
+   if (typByVal)
+       result = hash_bytes((unsigned char *) &value, sizeof(Datum));
+   else if (typLen > 0)
+       result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen);
+   else if (typLen == -1)
+   {
+       struct varlena *val;
+
+       len = toast_raw_datum_size(value);
+
+       val = PG_DETOAST_DATUM_PACKED(value);
+
+       result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ);
+
+       /* Only free memory if it's a copy made here. */
+       if ((Pointer) val != (Pointer) value)
+           pfree(val);
+   }
+   else if (typLen == -2)
+   {
+       char       *s;
+
+       s = DatumGetCString(value);
+       len = strlen(s) + 1;
+
+       result = hash_bytes((unsigned char *) s, len);
+   }
+   else
+   {
+       elog(ERROR, "unexpected typLen: %d", typLen);
+       result = 0;             /* keep compiler quiet */
+   }
+
+   return result;
+}
+
  /*-------------------------------------------------------------------------
   * btequalimage
   *
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 2e8cbee69ff6ebbcf080deed0fc68976150e7920..d96ace32e43bc7ef7987a5123142c9162f685641 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2109,6 +2109,8 @@ typedef struct MemoizeState
                                  * NULL if 'last_tuple' is NULL. */
     bool        singlerow;      /* true if the cache entry is to be marked as
                                  * complete after caching the first tuple. */
+   bool        binary_mode;    /* true when cache key should be compared bit
+                                * by bit, false when using hash equality ops */
     MemoizeInstrumentation stats;   /* execution statistics */
     SharedMemoizeInfo *shared_info; /* statistics for parallel workers */
  } MemoizeState;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h

index 186e89905b2bee92e9ec703caf945bcef2ecae09..324d92880b56c435d43e6952e8548c0a76be9c76 100644 (file)
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1515,6 +1515,8 @@ typedef struct MemoizePath
     List       *param_exprs;    /* cache keys */
     bool        singlerow;      /* true if the cache entry is to be marked as
                                  * complete after caching the first record. */
+   bool        binary_mode;    /* true when cache key should be compared bit
+                                * by bit, false when using hash equality ops */
     Cardinality calls;          /* expected number of rescans */
     uint32      est_entries;    /* The maximum number of entries that the
                                  * planner expects will fit in the cache, or 0
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h

index 01a246d50ed6033158d169dddf56df872ef0f673..f1328be3549b69513f9aa37e0d9dbc7cc6906483 100644 (file)
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -799,6 +799,8 @@ typedef struct Memoize
     bool        singlerow;      /* true if the cache entry should be marked as
                                  * complete after we store the first tuple in
                                  * it. */
+   bool        binary_mode;    /* true when cache key should be compared bit
+                                * by bit, false when using hash equality ops */
     uint32      est_entries;    /* The maximum number of entries that the
                                  * planner expects will fit in the cache, or 0
                                  * if unknown */
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index f704d399809e84819490bd78eb5030549cb107da..2922c0cdc14691a89bb5da6ccb907dcf5cf2d7a4 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -88,6 +88,7 @@ extern MemoizePath *create_memoize_path(PlannerInfo *root,
                                         List *param_exprs,
                                         List *hash_operators,
                                         bool singlerow,
+                                       bool binary_mode,
                                         double calls);
  extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
                                       Path *subpath, SpecialJoinInfo *sjinfo);
diff --git a/src/include/utils/datum.h b/src/include/utils/datum.h

index d4cf62bed73229f28c1a4a2747b5e2eb10802e93..8a59f11006fb5c625843e28c9d0c5c5f230938db 100644 (file)
--- a/src/include/utils/datum.h
+++ b/src/include/utils/datum.h
@@ -55,6 +55,14 @@ extern bool datumIsEqual(Datum value1, Datum value2,
  extern bool datum_image_eq(Datum value1, Datum value2,
                            bool typByVal, int typLen);
  
+/*
+ * datum_image_hash
+ *
+ * Generates hash value for 'value' based on its bits rather than logical
+ * value.
+ */
+extern uint32 datum_image_hash(Datum value, bool typByVal, int typLen);
+
  /*
   * Serialize and restore datums so that we can transfer them to parallel
   * workers.
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 84331659e7d53de5f5ed74a48450d670501d00a9..d5b5b775fddc99f59e8b7eb6394f28dee1138582 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3686,9 +3686,10 @@ where t1.unique1 = 1;
                       Index Cond: (hundred = t1.hundred)
           ->  Memoize
                 Cache Key: t2.thousand
+               Cache Mode: logical
                 ->  Index Scan using tenk1_unique2 on tenk1 t3
                       Index Cond: (unique2 = t2.thousand)
-(13 rows)
+(14 rows)
  
  explain (costs off)
  select * from tenk1 t1 left join
@@ -3708,9 +3709,10 @@ where t1.unique1 = 1;
                       Index Cond: (hundred = t1.hundred)
           ->  Memoize
                 Cache Key: t2.thousand
+               Cache Mode: logical
                 ->  Index Scan using tenk1_unique2 on tenk1 t3
                       Index Cond: (unique2 = t2.thousand)
-(13 rows)
+(14 rows)
  
  explain (costs off)
  select count(*) from
@@ -4238,11 +4240,12 @@ where t1.f1 = ss.f1;
     ->  Memoize
           Output: (i8.q1), t2.f1
           Cache Key: i8.q1
+         Cache Mode: binary
           ->  Limit
                 Output: (i8.q1), t2.f1
                 ->  Seq Scan on public.text_tbl t2
                       Output: i8.q1, t2.f1
-(19 rows)
+(20 rows)
  
  select * from
    text_tbl t1
@@ -4282,6 +4285,7 @@ where t1.f1 = ss2.f1;
           ->  Memoize
                 Output: (i8.q1), t2.f1
                 Cache Key: i8.q1
+               Cache Mode: binary
                 ->  Limit
                       Output: (i8.q1), t2.f1
                       ->  Seq Scan on public.text_tbl t2
@@ -4289,11 +4293,12 @@ where t1.f1 = ss2.f1;
     ->  Memoize
           Output: ((i8.q1)), (t2.f1)
           Cache Key: (i8.q1), t2.f1
+         Cache Mode: binary
           ->  Limit
                 Output: ((i8.q1)), (t2.f1)
                 ->  Seq Scan on public.text_tbl t3
                       Output: (i8.q1), t2.f1
-(28 rows)
+(30 rows)
  
  select * from
    text_tbl t1
@@ -4342,6 +4347,7 @@ where tt1.f1 = ss1.c0;
     ->  Memoize
           Output: ss1.c0
           Cache Key: tt4.f1
+         Cache Mode: binary
           ->  Subquery Scan on ss1
                 Output: ss1.c0
                 Filter: (ss1.c0 = 'foo'::text)
@@ -4349,7 +4355,7 @@ where tt1.f1 = ss1.c0;
                       Output: (tt4.f1)
                       ->  Seq Scan on public.text_tbl tt5
                             Output: tt4.f1
-(32 rows)
+(33 rows)
  
  select 1 from
    text_tbl as tt1
@@ -5058,8 +5064,9 @@ explain (costs off)
           ->  Seq Scan on tenk1 a
           ->  Memoize
                 Cache Key: a.two
+               Cache Mode: binary
                 ->  Function Scan on generate_series g
-(6 rows)
+(7 rows)
  
  explain (costs off)
    select count(*) from tenk1 a cross join lateral generate_series(1,two) g;
@@ -5070,8 +5077,9 @@ explain (costs off)
           ->  Seq Scan on tenk1 a
           ->  Memoize
                 Cache Key: a.two
+               Cache Mode: binary
                 ->  Function Scan on generate_series g
-(6 rows)
+(7 rows)
  
  -- don't need the explicit LATERAL keyword for functions
  explain (costs off)
@@ -5083,8 +5091,9 @@ explain (costs off)
           ->  Seq Scan on tenk1 a
           ->  Memoize
                 Cache Key: a.two
+               Cache Mode: binary
                 ->  Function Scan on generate_series g
-(6 rows)
+(7 rows)
  
  -- lateral with UNION ALL subselect
  explain (costs off)
@@ -5145,9 +5154,10 @@ explain (costs off)
                 ->  Values Scan on "*VALUES*"
           ->  Memoize
                 Cache Key: "*VALUES*".column1
+               Cache Mode: logical
                 ->  Index Only Scan using tenk1_unique2 on tenk1 b
                       Index Cond: (unique2 = "*VALUES*".column1)
-(9 rows)
+(10 rows)
  
  select count(*) from tenk1 a,
    tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x;
diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out

index 9a025c4a7ab9eef644d780d3a21d1275e2e73e7d..0ed5d8474af090f61e1ded4205e49455702edb5a 100644 (file)
--- a/src/test/regress/expected/memoize.out
+++ b/src/test/regress/expected/memoize.out
@@ -44,11 +44,12 @@ WHERE t2.unique1 < 1000;', false);
                 Rows Removed by Filter: 9000
           ->  Memoize (actual rows=1 loops=N)
                 Cache Key: t2.twenty
+               Cache Mode: logical
                 Hits: 980  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                 ->  Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1 loops=N)
                       Index Cond: (unique1 = t2.twenty)
                       Heap Fetches: N
-(11 rows)
+(12 rows)
  
  -- And check we get the expected results.
  SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1
@@ -73,11 +74,12 @@ WHERE t1.unique1 < 1000;', false);
                 Rows Removed by Filter: 9000
           ->  Memoize (actual rows=1 loops=N)
                 Cache Key: t1.twenty
+               Cache Mode: logical
                 Hits: 980  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                 ->  Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1 loops=N)
                       Index Cond: (unique1 = t1.twenty)
                       Heap Fetches: N
-(11 rows)
+(12 rows)
  
  -- And check we get the expected results.
  SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
@@ -107,12 +109,94 @@ WHERE t2.unique1 < 1200;', true);
                 Rows Removed by Filter: 8800
           ->  Memoize (actual rows=1 loops=N)
                 Cache Key: t2.thousand
+               Cache Mode: logical
                 Hits: N  Misses: N  Evictions: N  Overflows: 0  Memory Usage: NkB
                 ->  Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1 loops=N)
                       Index Cond: (unique1 = t2.thousand)
                       Heap Fetches: N
-(11 rows)
+(12 rows)
  
+CREATE TABLE flt (f float);
+CREATE INDEX flt_f_idx ON flt (f);
+INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float);
+ANALYZE flt;
+SET enable_seqscan TO off;
+-- Ensure memoize operates in logical mode
+SELECT explain_memoize('
+SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false);
+                                explain_memoize                                
+-------------------------------------------------------------------------------
+ Nested Loop (actual rows=4 loops=N)
+   ->  Index Only Scan using flt_f_idx on flt f1 (actual rows=2 loops=N)
+         Heap Fetches: N
+   ->  Memoize (actual rows=2 loops=N)
+         Cache Key: f1.f
+         Cache Mode: logical
+         Hits: 1  Misses: 1  Evictions: Zero  Overflows: 0  Memory Usage: NkB
+         ->  Index Only Scan using flt_f_idx on flt f2 (actual rows=2 loops=N)
+               Index Cond: (f = f1.f)
+               Heap Fetches: N
+(10 rows)
+
+-- Ensure memoize operates in binary mode
+SELECT explain_memoize('
+SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false);
+                                explain_memoize                                
+-------------------------------------------------------------------------------
+ Nested Loop (actual rows=4 loops=N)
+   ->  Index Only Scan using flt_f_idx on flt f1 (actual rows=2 loops=N)
+         Heap Fetches: N
+   ->  Memoize (actual rows=2 loops=N)
+         Cache Key: f1.f
+         Cache Mode: binary
+         Hits: 0  Misses: 2  Evictions: Zero  Overflows: 0  Memory Usage: NkB
+         ->  Index Only Scan using flt_f_idx on flt f2 (actual rows=2 loops=N)
+               Index Cond: (f <= f1.f)
+               Heap Fetches: N
+(10 rows)
+
+DROP TABLE flt;
+-- Exercise Memoize in binary mode with a large fixed width type and a
+-- varlena type.
+CREATE TABLE strtest (n name, t text);
+CREATE INDEX strtest_n_idx ON strtest (n);
+CREATE INDEX strtest_t_idx ON strtest (t);
+INSERT INTO strtest VALUES('one','one'),('two','two'),('three',repeat(md5('three'),100));
+-- duplicate rows so we get some cache hits
+INSERT INTO strtest SELECT * FROM strtest;
+ANALYZE strtest;
+-- Ensure we get 3 hits and 3 misses
+SELECT explain_memoize('
+SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.n >= s2.n;', false);
+                                 explain_memoize                                  
+----------------------------------------------------------------------------------
+ Nested Loop (actual rows=24 loops=N)
+   ->  Seq Scan on strtest s1 (actual rows=6 loops=N)
+   ->  Memoize (actual rows=4 loops=N)
+         Cache Key: s1.n
+         Cache Mode: binary
+         Hits: 3  Misses: 3  Evictions: Zero  Overflows: 0  Memory Usage: NkB
+         ->  Index Scan using strtest_n_idx on strtest s2 (actual rows=4 loops=N)
+               Index Cond: (n <= s1.n)
+(8 rows)
+
+-- Ensure we get 3 hits and 3 misses
+SELECT explain_memoize('
+SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false);
+                                 explain_memoize                                  
+----------------------------------------------------------------------------------
+ Nested Loop (actual rows=24 loops=N)
+   ->  Seq Scan on strtest s1 (actual rows=6 loops=N)
+   ->  Memoize (actual rows=4 loops=N)
+         Cache Key: s1.t
+         Cache Mode: binary
+         Hits: 3  Misses: 3  Evictions: Zero  Overflows: 0  Memory Usage: NkB
+         ->  Index Scan using strtest_t_idx on strtest s2 (actual rows=4 loops=N)
+               Index Cond: (t <= s1.t)
+(8 rows)
+
+DROP TABLE strtest;
+RESET enable_seqscan;
  RESET enable_mergejoin;
  RESET work_mem;
  RESET enable_bitmapscan;
@@ -140,9 +224,10 @@ WHERE t1.unique1 < 1000;
                                   Index Cond: (unique1 < 1000)
                       ->  Memoize
                             Cache Key: t1.twenty
+                           Cache Mode: logical
                             ->  Index Only Scan using tenk1_unique1 on tenk1 t2
                                   Index Cond: (unique1 = t1.twenty)
-(13 rows)
+(14 rows)
  
  -- And ensure the parallel plan gives us the correct results.
  SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out

index 07426260330378e7b2dd1c451619eeb8c320d243..4e8ddc70613a98d639e4dd2e2dd27e3f9249fdbb 100644 (file)
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1139,13 +1139,14 @@ where o.ten = 1;
                 Filter: (ten = 1)
           ->  Memoize
                 Cache Key: o.four
+               Cache Mode: binary
                 ->  CTE Scan on x
                       CTE x
                         ->  Recursive Union
                               ->  Result
                               ->  WorkTable Scan on x x_1
                                     Filter: (a < 10)
-(12 rows)
+(13 rows)
  
  select sum(o.four), sum(ss.a) from
    onek o cross join lateral (
diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql

index 548cc3eee3020d9aa58fdef80ad1480247e5517e..3c7360adf9c8f74d2fb6ee9462b7d32ee06d3985 100644 (file)
--- a/src/test/regress/sql/memoize.sql
+++ b/src/test/regress/sql/memoize.sql
@@ -65,6 +65,45 @@ SELECT explain_memoize('
  SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1
  INNER JOIN tenk1 t2 ON t1.unique1 = t2.thousand
  WHERE t2.unique1 < 1200;', true);
+
+CREATE TABLE flt (f float);
+CREATE INDEX flt_f_idx ON flt (f);
+INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float);
+ANALYZE flt;
+
+SET enable_seqscan TO off;
+
+-- Ensure memoize operates in logical mode
+SELECT explain_memoize('
+SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false);
+
+-- Ensure memoize operates in binary mode
+SELECT explain_memoize('
+SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false);
+
+DROP TABLE flt;
+
+-- Exercise Memoize in binary mode with a large fixed width type and a
+-- varlena type.
+CREATE TABLE strtest (n name, t text);
+CREATE INDEX strtest_n_idx ON strtest (n);
+CREATE INDEX strtest_t_idx ON strtest (t);
+INSERT INTO strtest VALUES('one','one'),('two','two'),('three',repeat(md5('three'),100));
+-- duplicate rows so we get some cache hits
+INSERT INTO strtest SELECT * FROM strtest;
+ANALYZE strtest;
+
+-- Ensure we get 3 hits and 3 misses
+SELECT explain_memoize('
+SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.n >= s2.n;', false);
+
+-- Ensure we get 3 hits and 3 misses
+SELECT explain_memoize('
+SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false);
+
+DROP TABLE strtest;
+
+RESET enable_seqscan;
  RESET enable_mergejoin;
  RESET work_mem;
  RESET enable_bitmapscan;
author	David Rowley <drowley@postgresql.org>
	Tue, 23 Nov 2021 21:06:59 +0000 (10:06 +1300)
committer	David Rowley <drowley@postgresql.org>
	Tue, 23 Nov 2021 21:06:59 +0000 (10:06 +1300)
contrib/postgres_fdw/expected/postgres_fdw.out		patch \| blob \| blame \| history
src/backend/commands/explain.c		patch \| blob \| blame \| history
src/backend/executor/nodeMemoize.c		patch \| blob \| blame \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/outfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/readfuncs.c		patch \| blob \| blame \| history
src/backend/optimizer/path/joinpath.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| blame \| history
src/backend/utils/adt/datum.c		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history
src/include/nodes/pathnodes.h		patch \| blob \| blame \| history
src/include/nodes/plannodes.h		patch \| blob \| blame \| history
src/include/optimizer/pathnode.h		patch \| blob \| blame \| history
src/include/utils/datum.h		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/expected/memoize.out		patch \| blob \| blame \| history
src/test/regress/expected/subselect.out		patch \| blob \| blame \| history
src/test/regress/sql/memoize.sql		patch \| blob \| blame \| history