Add a hash opclass for type "tid".

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 30 Dec 2018 20:40:04 +0000 (15:40 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 30 Dec 2018 20:40:04 +0000 (15:40 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 30 Dec 2018 20:40:04 +0000 (15:40 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 30 Dec 2018 20:40:04 +0000 (15:40 -0500)
diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c

index 41d540b46ecded139d59d6290ef53b9df2660f0d..7b25947682e80341e52dd6aec126de5340967fc0 100644 (file)
--- a/src/backend/utils/adt/tid.c
+++ b/src/backend/utils/adt/tid.c
@@ -20,6 +20,7 @@
  #include <math.h>
  #include <limits.h>
  
+#include "access/hash.h"
  #include "access/heapam.h"
  #include "access/sysattr.h"
  #include "catalog/namespace.h"
@@ -239,6 +240,33 @@ tidsmaller(PG_FUNCTION_ARGS)
     PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1, arg2) <= 0 ? arg1 : arg2);
  }
  
+Datum
+hashtid(PG_FUNCTION_ARGS)
+{
+   ItemPointer key = PG_GETARG_ITEMPOINTER(0);
+
+   /*
+    * While you'll probably have a lot of trouble with a compiler that
+    * insists on appending pad space to struct ItemPointerData, we can at
+    * least make this code work, by not using sizeof(ItemPointerData).
+    * Instead rely on knowing the sizes of the component fields.
+    */
+   return hash_any((unsigned char *) key,
+                   sizeof(BlockIdData) + sizeof(OffsetNumber));
+}
+
+Datum
+hashtidextended(PG_FUNCTION_ARGS)
+{
+   ItemPointer key = PG_GETARG_ITEMPOINTER(0);
+   uint64      seed = PG_GETARG_INT64(1);
+
+   /* As above */
+   return hash_any_extended((unsigned char *) key,
+                            sizeof(BlockIdData) + sizeof(OffsetNumber),
+                            seed);
+}
+
  
  /*
   * Functions to get latest tid of a specified tuple.
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 0e89b303650bf2eb94d999d5a31ea77281b2d73e..588a110093a6d85569462f059a7ba0ddd6f633a8 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
   */
  
  /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 201812202
+#define CATALOG_VERSION_NO 201812301
  
  #endif
diff --git a/src/include/catalog/pg_amop.dat b/src/include/catalog/pg_amop.dat

index e689c9b160ae4898eba7b0210b06d435196720fd..436f1bd0762ddabd31b0c9d950c8e917b13112c7 100644 (file)
--- a/src/include/catalog/pg_amop.dat
+++ b/src/include/catalog/pg_amop.dat
@@ -1013,6 +1013,10 @@
  { amopfamily => 'hash/cid_ops', amoplefttype => 'cid', amoprighttype => 'cid',
    amopstrategy => '1', amopopr => '=(cid,cid)', amopmethod => 'hash' },
  
+# tid_ops
+{ amopfamily => 'hash/tid_ops', amoplefttype => 'tid', amoprighttype => 'tid',
+  amopstrategy => '1', amopopr => '=(tid,tid)', amopmethod => 'hash' },
+
  # text_pattern_ops
  { amopfamily => 'hash/text_pattern_ops', amoplefttype => 'text',
    amoprighttype => 'text', amopstrategy => '1', amopopr => '=(text,text)',
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat

index bbcee26fa8b6929868df5d2ca8775135860e0d63..8ddb6991121a0e1662f0d8eef66c8d3a1dcd887f 100644 (file)
--- a/src/include/catalog/pg_amproc.dat
+++ b/src/include/catalog/pg_amproc.dat
@@ -340,6 +340,10 @@
    amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashint4' },
  { amprocfamily => 'hash/cid_ops', amproclefttype => 'cid',
    amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashint4extended' },
+{ amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
+  amprocrighttype => 'tid', amprocnum => '1', amproc => 'hashtid' },
+{ amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
+  amprocrighttype => 'tid', amprocnum => '2', amproc => 'hashtidextended' },
  { amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text',
    amprocrighttype => 'text', amprocnum => '1', amproc => 'hashtext' },
  { amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text',
diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat

index 5178d04337fa8ebab007cd7acb1d45e675ae9c58..c451d365a6f7e820d5055a1a8cc8c8bde0a3a513 100644 (file)
--- a/src/include/catalog/pg_opclass.dat
+++ b/src/include/catalog/pg_opclass.dat
@@ -167,6 +167,8 @@
    opcintype => 'xid' },
  { opcmethod => 'hash', opcname => 'cid_ops', opcfamily => 'hash/cid_ops',
    opcintype => 'cid' },
+{ opcmethod => 'hash', opcname => 'tid_ops', opcfamily => 'hash/tid_ops',
+  opcintype => 'tid' },
  { opcmethod => 'hash', opcname => 'text_pattern_ops',
    opcfamily => 'hash/text_pattern_ops', opcintype => 'text',
    opcdefault => 'f' },
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat

index 2abd5311c3e5b59b25f329f3250da37413e6b446..e8452e10d21bb2782655786eb4ac4ca90b2537f6 100644 (file)
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -204,9 +204,10 @@
    oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
  
  { oid => '387', oid_symbol => 'TIDEqualOperator', descr => 'equal',
-  oprname => '=', oprcanmerge => 't', oprleft => 'tid', oprright => 'tid',
-  oprresult => 'bool', oprcom => '=(tid,tid)', oprnegate => '<>(tid,tid)',
-  oprcode => 'tideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
+  oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'tid',
+  oprright => 'tid', oprresult => 'bool', oprcom => '=(tid,tid)',
+  oprnegate => '<>(tid,tid)', oprcode => 'tideq', oprrest => 'eqsel',
+  oprjoin => 'eqjoinsel' },
  { oid => '402', descr => 'not equal',
    oprname => '<>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
    oprcom => '<>(tid,tid)', oprnegate => '=(tid,tid)', oprcode => 'tidne',
diff --git a/src/include/catalog/pg_opfamily.dat b/src/include/catalog/pg_opfamily.dat

index fe8a32485f2742c36293562363115f4363cb1de9..c5ea37b5cd759e0a85ae2544ff4b18d894d3cb91 100644 (file)
--- a/src/include/catalog/pg_opfamily.dat
+++ b/src/include/catalog/pg_opfamily.dat
@@ -112,6 +112,8 @@
    opfmethod => 'hash', opfname => 'xid_ops' },
  { oid => '2226',
    opfmethod => 'hash', opfname => 'cid_ops' },
+{ oid => '2227',
+  opfmethod => 'hash', opfname => 'tid_ops' },
  { oid => '2229',
    opfmethod => 'hash', opfname => 'text_pattern_ops' },
  { oid => '2231',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat

index acb0154048a0f0740b6a1eb8c5dfec4ecf11f5e2..6e1e1dfad71c64e55de4a52fe12537a9548d8eeb 100644 (file)
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -2484,6 +2484,12 @@
  { oid => '2796', descr => 'smaller of two',
    proname => 'tidsmaller', prorettype => 'tid', proargtypes => 'tid tid',
    prosrc => 'tidsmaller' },
+{ oid => '2233', descr => 'hash',
+  proname => 'hashtid', prorettype => 'int4', proargtypes => 'tid',
+  prosrc => 'hashtid' },
+{ oid => '2234', descr => 'hash',
+  proname => 'hashtidextended', prorettype => 'int8', proargtypes => 'tid int8',
+  prosrc => 'hashtidextended' },
  
  { oid => '1296',
    proname => 'timedate_pl', prolang => '14', prorettype => 'timestamp',
diff --git a/src/test/regress/expected/tidscan.out b/src/test/regress/expected/tidscan.out

index 8f15c04dad1aafa4ebeaa8f44886b44db0704d1a..9b5eb04bfd9c11a038c12d7fe72dbdf0b6d0e46c 100644 (file)
--- a/src/test/regress/expected/tidscan.out
+++ b/src/test/regress/expected/tidscan.out
@@ -109,6 +109,7 @@ WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1);
  (2 rows)
  
  -- nestloop-with-inner-tidscan joins on tid
+SET enable_hashjoin TO off;  -- otherwise hash join might win
  EXPLAIN (COSTS OFF)
  SELECT t1.ctid, t1.*, t2.ctid, t2.*
  FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
@@ -147,6 +148,7 @@ FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
   (0,1) |  1 | (0,1) |  1
  (1 row)
  
+RESET enable_hashjoin;
  -- exercise backward scan and rewind
  BEGIN;
  DECLARE c CURSOR FOR
@@ -231,4 +233,48 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
  UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
  ERROR:  cursor "c" is not positioned on a row
  ROLLBACK;
+-- bulk joins on CTID
+-- (these plans don't use TID scans, but this still seems like an
+-- appropriate place for these tests)
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+               QUERY PLAN               
+----------------------------------------
+ Aggregate
+   ->  Hash Join
+         Hash Cond: (t1.ctid = t2.ctid)
+         ->  Seq Scan on tenk1 t1
+         ->  Hash
+               ->  Seq Scan on tenk1 t2
+(6 rows)
+
+SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+ count 
+-------
+ 10000
+(1 row)
+
+SET enable_hashjoin TO off;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+               QUERY PLAN                
+-----------------------------------------
+ Aggregate
+   ->  Merge Join
+         Merge Cond: (t1.ctid = t2.ctid)
+         ->  Sort
+               Sort Key: t1.ctid
+               ->  Seq Scan on tenk1 t1
+         ->  Sort
+               Sort Key: t2.ctid
+               ->  Seq Scan on tenk1 t2
+(9 rows)
+
+SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+ count 
+-------
+ 10000
+(1 row)
+
+RESET enable_hashjoin;
  DROP TABLE tidscan;
diff --git a/src/test/regress/sql/tidscan.sql b/src/test/regress/sql/tidscan.sql

index 2d63aa067fef88a9ef552a94123c2f7e06952a63..ef05c0984207919626a14d9ab5581f76d73bca8c 100644 (file)
--- a/src/test/regress/sql/tidscan.sql
+++ b/src/test/regress/sql/tidscan.sql
@@ -40,6 +40,7 @@ SELECT ctid, * FROM tidscan
  WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1);
  
  -- nestloop-with-inner-tidscan joins on tid
+SET enable_hashjoin TO off;  -- otherwise hash join might win
  EXPLAIN (COSTS OFF)
  SELECT t1.ctid, t1.*, t2.ctid, t2.*
  FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
@@ -50,6 +51,7 @@ SELECT t1.ctid, t1.*, t2.ctid, t2.*
  FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
  SELECT t1.ctid, t1.*, t2.ctid, t2.*
  FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
+RESET enable_hashjoin;
  
  -- exercise backward scan and rewind
  BEGIN;
@@ -80,4 +82,16 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
  UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
  ROLLBACK;
  
+-- bulk joins on CTID
+-- (these plans don't use TID scans, but this still seems like an
+-- appropriate place for these tests)
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+SET enable_hashjoin TO off;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+RESET enable_hashjoin;
+
  DROP TABLE tidscan;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 30 Dec 2018 20:40:04 +0000 (15:40 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 30 Dec 2018 20:40:04 +0000 (15:40 -0500)
src/backend/utils/adt/tid.c		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/catalog/pg_amop.dat		patch \| blob \| blame \| history
src/include/catalog/pg_amproc.dat		patch \| blob \| blame \| history
src/include/catalog/pg_opclass.dat		patch \| blob \| blame \| history
src/include/catalog/pg_operator.dat		patch \| blob \| blame \| history
src/include/catalog/pg_opfamily.dat		patch \| blob \| blame \| history
src/include/catalog/pg_proc.dat		patch \| blob \| blame \| history
src/test/regress/expected/tidscan.out		patch \| blob \| blame \| history
src/test/regress/sql/tidscan.sql		patch \| blob \| blame \| history