From 5e97905a2c764d4ca36f5c6cccd0ebbf157b9df4 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 9 Dec 2021 08:36:59 +0530 Subject: [PATCH] Fix double publish of child table's data. We publish the child table's data twice for a publication that has both child and parent tables and is published with publish_via_partition_root as true. This happens because subscribers will initiate synchronization using both parent and child tables, since it gets both as separate tables in the initial table list. Ensure that pg_publication_tables returns only parent tables in such cases. Author: Hou Zhijie Reviewed-by: Greg Nancarrow, Amit Langote, Vignesh C, Amit Kapila Backpatch-through: 13 Discussion: https://postgr.es/m/OS0PR01MB57167F45D481F78CDC5986F794B99@OS0PR01MB5716.jpnprd01.prod.outlook.com --- src/backend/catalog/pg_publication.c | 41 ++++++++--------------- src/test/regress/expected/publication.out | 8 +++++ src/test/regress/sql/publication.sql | 4 +++ src/test/subscription/t/013_partition.pl | 18 +++++++--- 4 files changed, 40 insertions(+), 31 deletions(-) diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index 65db07f6024..62f10bcbd28 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -142,7 +142,7 @@ is_publishable_class(Oid relid, Form_pg_class reltuple) * the publication. */ static List * -filter_partitions(List *relids, List *schemarelids) +filter_partitions(List *relids) { List *result = NIL; ListCell *lc; @@ -161,16 +161,8 @@ filter_partitions(List *relids, List *schemarelids) { Oid ancestor = lfirst_oid(lc2); - /* - * Check if the parent table exists in the published table list. - * - * XXX As of now, we do this if the partition relation or the - * partition relation's ancestor is present in schema publication - * relations. - */ - if (list_member_oid(relids, ancestor) && - (list_member_oid(schemarelids, relid) || - list_member_oid(schemarelids, ancestor))) + /* Check if the parent table exists in the published table list. */ + if (list_member_oid(relids, ancestor)) { skip = true; break; @@ -913,22 +905,17 @@ pg_get_publication_tables(PG_FUNCTION_ARGS) PUBLICATION_PART_ROOT : PUBLICATION_PART_LEAF); tables = list_concat_unique_oid(relids, schemarelids); - if (schemarelids && publication->pubviaroot) - { - /* - * If the publication publishes partition changes via their - * respective root partitioned tables, we must exclude - * partitions in favor of including the root partitioned - * tables. Otherwise, the function could return both the child - * and parent tables which could cause data of the child table - * to be double-published on the subscriber side. - * - * XXX As of now, we do this when a publication has associated - * schema or for all tables publication. See - * GetAllTablesPublicationRelations(). - */ - tables = filter_partitions(tables, schemarelids); - } + + /* + * If the publication publishes partition changes via their + * respective root partitioned tables, we must exclude partitions + * in favor of including the root partitioned tables. Otherwise, + * the function could return both the child and parent tables + * which could cause data of the child table to be + * double-published on the subscriber side. + */ + if (publication->pubviaroot) + tables = filter_partitions(tables); } funcctx->user_fctx = (void *) tables; diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out index c096fbdac58..5ac2d666a20 100644 --- a/src/test/regress/expected/publication.out +++ b/src/test/regress/expected/publication.out @@ -829,6 +829,14 @@ SELECT * FROM pg_publication_tables; pub | sch2 | tbl1_part1 (1 row) +-- Table publication that includes both the parent table and the child table +ALTER PUBLICATION pub ADD TABLE sch1.tbl1; +SELECT * FROM pg_publication_tables; + pubname | schemaname | tablename +---------+------------+----------- + pub | sch1 | tbl1 +(1 row) + DROP PUBLICATION pub; -- Schema publication that does not include the schema that has the parent table CREATE PUBLICATION pub FOR ALL TABLES IN SCHEMA sch2 WITH (PUBLISH_VIA_PARTITION_ROOT=0); diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql index 06628825444..56dd358554a 100644 --- a/src/test/regress/sql/publication.sql +++ b/src/test/regress/sql/publication.sql @@ -474,6 +474,10 @@ DROP PUBLICATION pub; CREATE PUBLICATION pub FOR TABLE sch2.tbl1_part1 WITH (PUBLISH_VIA_PARTITION_ROOT=1); SELECT * FROM pg_publication_tables; +-- Table publication that includes both the parent table and the child table +ALTER PUBLICATION pub ADD TABLE sch1.tbl1; +SELECT * FROM pg_publication_tables; + DROP PUBLICATION pub; -- Schema publication that does not include the schema that has the parent table CREATE PUBLICATION pub FOR ALL TABLES IN SCHEMA sch2 WITH (PUBLISH_VIA_PARTITION_ROOT=0); diff --git a/src/test/subscription/t/013_partition.pl b/src/test/subscription/t/013_partition.pl index c75a07d6b35..6005178a325 100644 --- a/src/test/subscription/t/013_partition.pl +++ b/src/test/subscription/t/013_partition.pl @@ -6,7 +6,7 @@ use strict; use warnings; use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 62; +use Test::More tests => 63; # setup @@ -412,11 +412,16 @@ $node_publisher->safe_psql('postgres', $node_publisher->safe_psql('postgres', "ALTER PUBLICATION pub_all SET (publish_via_partition_root = true)"); # Note: tab3_1's parent is not in the publication, in which case its -# changes are published using own identity. +# changes are published using own identity. For tab2, even though both parent +# and child tables are present but changes will be replicated via the parent's +# identity and only once. $node_publisher->safe_psql('postgres', - "CREATE PUBLICATION pub_viaroot FOR TABLE tab2, tab3_1 WITH (publish_via_partition_root = true)" + "CREATE PUBLICATION pub_viaroot FOR TABLE tab2, tab2_1, tab3_1 WITH (publish_via_partition_root = true)" ); +# prepare data for the initial sync +$node_publisher->safe_psql('postgres', "INSERT INTO tab2 VALUES (1)"); + # subscriber 1 $node_subscriber1->safe_psql('postgres', "DROP SUBSCRIPTION sub1"); $node_subscriber1->safe_psql('postgres', @@ -468,12 +473,17 @@ $node_subscriber1->poll_query_until('postgres', $synced_query) $node_subscriber2->poll_query_until('postgres', $synced_query) or die "Timed out while waiting for subscriber to synchronize data"; +# check that data is synced correctly +$result = $node_subscriber1->safe_psql('postgres', + "SELECT c, a FROM tab2"); +is( $result, qq(sub1_tab2|1), 'initial data synced for pub_viaroot'); + # insert $node_publisher->safe_psql('postgres', "INSERT INTO tab1 VALUES (1), (0)"); $node_publisher->safe_psql('postgres', "INSERT INTO tab1_1 (a) VALUES (3)"); $node_publisher->safe_psql('postgres', "INSERT INTO tab1_2 VALUES (5)"); $node_publisher->safe_psql('postgres', - "INSERT INTO tab2 VALUES (1), (0), (3), (5)"); + "INSERT INTO tab2 VALUES (0), (3), (5)"); $node_publisher->safe_psql('postgres', "INSERT INTO tab3 VALUES (1), (0), (3), (5)"); -- 2.30.2