node->ss.ss_currentRelation,
filename,
is_program,
+ NULL,
NIL,
options);
node->ss.ss_currentRelation,
festate->filename,
festate->is_program,
+ NULL,
NIL,
festate->options);
}
/*
* Create CopyState from FDW options.
*/
- cstate = BeginCopyFrom(NULL, onerel, filename, is_program, NIL, options);
+ cstate = BeginCopyFrom(NULL, onerel, filename, is_program, NULL, NIL,
+ options);
/*
* Use per-tuple memory context to prevent leak of memory used to read
<entry>logical replication subscriptions</entry>
</row>
+ <row>
+ <entry><link linkend="catalog-pg-subscription-rel"><structname>pg_subscription_rel</structname></link></entry>
+ <entry>relation state for subscriptions</entry>
+ </row>
+
<row>
<entry><link linkend="catalog-pg-tablespace"><structname>pg_tablespace</structname></link></entry>
<entry>tablespaces within this database cluster</entry>
</table>
</sect1>
+ <sect1 id="catalog-pg-subscription-rel">
+ <title><structname>pg_subscription_rel</structname></title>
+
+ <indexterm zone="catalog-pg-subscription-rel">
+ <primary>pg_subscription_rel</primary>
+ </indexterm>
+
+ <para>
+ The catalog <structname>pg_subscription_rel</structname> contains the
+ state for each replicated relation in each subscription. This is a
+ many-to-many mapping.
+ </para>
+
+ <para>
+ This catalog only contains tables known to the subscription after running
+ either <command>CREATE SUBSCRIPTION</command> or
+ <command>ALTER SUBSCRIPTION ... REFRESH</command>.
+ </para>
+
+ <table>
+ <title><structname>pg_subscription_rel</structname> Columns</title>
+
+ <tgroup cols="4">
+ <thead>
+ <row>
+ <entry>Name</entry>
+ <entry>Type</entry>
+ <entry>References</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry><structfield>srsubid</structfield></entry>
+ <entry><type>oid</type></entry>
+ <entry><literal><link linkend="catalog-pg-subscription"><structname>pg_subscription</structname></link>.oid</literal></entry>
+ <entry>Reference to subscription</entry>
+ </row>
+
+ <row>
+ <entry><structfield>srrelid</structfield></entry>
+ <entry><type>oid</type></entry>
+ <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
+ <entry>Reference to relation</entry>
+ </row>
+
+ <row>
+ <entry><structfield>srsubstate</structfield></entry>
+ <entry><type>char</type></entry>
+ <entry></entry>
+ <entry>
+ State code:
+ <literal>i</> = initialize,
+ <literal>d</> = data is being copied,
+ <literal>s</> = synchronized,
+ <literal>r</> = ready (normal replication)
+ </entry>
+ </row>
+
+ <row>
+ <entry><structfield>srsublsn</structfield></entry>
+ <entry><type>pg_lsn</type></entry>
+ <entry></entry>
+ <entry>
+ End LSN for <literal>s</> and <literal>r</> states.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </sect1>
+
<sect1 id="catalog-pg-tablespace">
<title><structname>pg_tablespace</structname></title>
</listitem>
</varlistentry>
+ <varlistentry id="guc-max-sync-workers-per-subscription" xreflabel="max_sync_workers_per_subscription">
+ <term><varname>max_sync_workers_per_subscription</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>max_sync_workers_per_subscription</> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Maximum number of synchronization workers per subscription. This
+ parameter controls the amount of paralelism of the initial data copy
+ during the subscription initialization or when new tables are added.
+ </para>
+ <para>
+ Currently, there can be only one synchronization worker per table.
+ </para>
+ <para>
+ The synchronization workers are taken from the pool defined by
+ <varname>max_logical_replication_workers</varname>.
+ </para>
+ <para>
+ The default value is 2.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
</para>
<para>
- Logical replication sends changes on the publisher to the subscriber as
- they occur in real-time. The subscriber applies the data in the same order
- as the publisher so that transactional consistency is guaranteed for
+ Logical replication of a table typically starts with a taking a snapshot
+ of the data on the publisher database and copying that to the subscriber.
+ Once that is done, the changes on the publisher are sent to the subscriber
+ as they occur in real-time. The subscriber applies the data in the same
+ order as the publisher so that transactional consistency is guaranteed for
publications within a single subscription. This method of data replication
is sometimes referred to as transactional replication.
</para>
<para>
Each subscription will receive changes via one replication slot (see
- <xref linkend="streaming-replication-slots">).
+ <xref linkend="streaming-replication-slots">). Additional temporary
+ replication slots may be required for the initial data synchronization
+ of pre-existing table data.
</para>
<para>
to <literal>replica</literal>, which produces the usual effects on triggers
and constraints.
</para>
+
+ <sect2 id="logical-replication-snapshot">
+ <title>Initial Snapshot</title>
+ <para>
+ The initial data in existing subscribed tables are snapshotted and
+ copied in a parallel instance of a special kind of apply process.
+ This process will create its own temporary replication slot and
+ copy the existing data. Once existing data is copied, the worker
+ enters synchronization mode, which ensures that the table is brought
+ up to a synchronized state with the main apply process by streaming
+ any changes that happened during the initial data copy using standard
+ logical replication. Once the synchronization is done, the control
+ of the replication of the table is given back to the main apply
+ process where the replication continues as normal.
+ </para>
+ </sect2>
</sect1>
- <sect1 id="logical-replication-monitoring">
+ <sect1 id="logical-replication-monitoring">
<title>Monitoring</title>
<para>
<para>
Normally, there is a single apply process running for an enabled
subscription. A disabled subscription or a crashed subscription will have
- zero rows in this view.
+ zero rows in this view. If the initial data synchronization of any
+ table is in progress, there will be additional workers for the tables
+ being synchronized.
</para>
</sect1>
<para>
On the publisher side, <varname>wal_level</varname> must be set to
<literal>logical</literal>, and <varname>max_replication_slots</varname>
- must be set to at least the number of subscriptions expected to connect.
- And <varname>max_wal_senders</varname> should be set to at least the same
- as <varname>max_replication_slots</varname> plus the number of physical replicas
- that are connected at the same time.
+ must be set to at least the number of subscriptions expected to connect,
+ plus some reserve for table synchronization. And
+ <varname>max_wal_senders</varname> should be set to at least the same as
+ <varname>max_replication_slots</varname> plus the number of physical
+ replicas that are connected at the same time.
</para>
<para>
to be set. In this case it should be set to at least the number of
subscriptions that will be added to the subscriber.
<varname>max_logical_replication_workers</varname> must be set to at
- least the number of subscriptions. Additionally the
- <varname>max_worker_processes</varname> may need to be adjusted to
- accommodate for replication workers, at least
+ least the number of subscriptions, again plus some reserve for the table
+ synchronization. Additionally the <varname>max_worker_processes</varname>
+ may need to be adjusted to accommodate for replication workers, at least
(<varname>max_logical_replication_workers</varname>
+ <literal>1</literal>). Note that some extensions and parallel queries
also take worker slots from <varname>max_worker_processes</varname>.
</para>
<para>
- The above will start the replication process of changes to
- <literal>users</literal> and <literal>departments</literal> tables.
+ The above will start the replication process, which synchronizes the
+ initial table contents of the tables <literal>users</literal> and
+ <literal>departments</literal> and then starts replicating
+ incremental changes to those tables.
</para>
</sect1>
</chapter>
<entry><type>integer</></entry>
<entry>Process ID of the subscription worker process</entry>
</row>
+ <row>
+ <entry><structfield>relid</></entry>
+ <entry><type>Oid</></entry>
+ <entry>OID of the relation that the worker is synchronizing; null for the
+ main apply worker</entry>
+ </row>
<row>
<entry><structfield>received_lsn</></entry>
<entry><type>pg_lsn</></entry>
<para>
The <structname>pg_stat_subscription</structname> view will contain one
row per subscription for main worker (with null PID if the worker is
- not running).
+ not running), and additional rows for workers handling the initial data
+ copy of the subscribed tables.
</para>
<table id="pg-stat-ssl-view" xreflabel="pg_stat_ssl">
</varlistentry>
<varlistentry id="protocol-replication-create-slot" xreflabel="CREATE_REPLICATION_SLOT">
- <term><literal>CREATE_REPLICATION_SLOT</literal> <replaceable class="parameter">slot_name</> [ <literal>TEMPORARY</> ] { <literal>PHYSICAL</> [ <literal>RESERVE_WAL</> ] | <literal>LOGICAL</> <replaceable class="parameter">output_plugin</> [ <literal>EXPORT_SNAPSHOT</> | <literal>NOEXPORT_SNAPSHOT</> ] }
+ <term><literal>CREATE_REPLICATION_SLOT</literal> <replaceable class="parameter">slot_name</> [ <literal>TEMPORARY</> ] { <literal>PHYSICAL</> [ <literal>RESERVE_WAL</> ] | <literal>LOGICAL</> <replaceable class="parameter">output_plugin</> [ <literal>EXPORT_SNAPSHOT</> | <literal>NOEXPORT_SNAPSHOT</> | <literal>USE_SNAPSHOT</> ] }
<indexterm><primary>CREATE_REPLICATION_SLOT</primary></indexterm>
</term>
<listitem>
<varlistentry>
<term><literal>EXPORT_SNAPSHOT</></term>
<term><literal>NOEXPORT_SNAPSHOT</></term>
+ <term><literal>USE_SNAPSHOT</></term>
<listitem>
<para>
Decides what to do with the snapshot created during logical slot
initialization. <literal>EXPORT_SNAPSHOT</>, which is the default,
will export the snapshot for use in other sessions. This option can't
- be used inside a transaction. <literal>NOEXPORT_SNAPSHOT</> will
+ be used inside a transaction. <literal>USE_SNAPSHOT</> will use the
+ snapshot for the current transaction executing the command. This
+ option must be used in a transaction, and
+ <literal>CREATE_REPLICATION_SLOT</literal> must be the first command
+ run in that transaction. Finally, <literal>NOEXPORT_SNAPSHOT</> will
just use the snapshot for logical decoding as normal but won't do
anything else with it.
</para>
<refsynopsisdiv>
<synopsis>
-ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> WITH ( <replaceable class="PARAMETER">option</replaceable> [, ... ] ) ]
+ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> WITH ( <replaceable class="PARAMETER">suboption</replaceable> [, ... ] ) ]
-<phrase>where <replaceable class="PARAMETER">option</replaceable> can be:</phrase>
+<phrase>where <replaceable class="PARAMETER">suboption</replaceable> can be:</phrase>
- SLOT NAME = <replaceable class="PARAMETER">slot_name</replaceable>
+ SLOT NAME = <replaceable class="PARAMETER">slot_name</replaceable>
+
+ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> SET PUBLICATION <replaceable class="PARAMETER">publication_name</replaceable> [, ...] { REFRESH WITH ( <replaceable class="PARAMETER">puboption</replaceable> [, ... ] ) | NOREFRESH }
+ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> REFRESH PUBLICATION WITH ( <replaceable class="PARAMETER">puboption</replaceable> [, ... ] )
+
+<phrase>where <replaceable class="PARAMETER">puboption</replaceable> can be:</phrase>
+
+ COPY DATA | NOCOPY DATA
ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> OWNER TO { <replaceable>new_owner</replaceable> | CURRENT_USER | SESSION_USER }
ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> CONNECTION '<replaceable>conninfo</replaceable>'
-ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> SET PUBLICATION <replaceable>publication_name</replaceable> [, ...]
ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> ENABLE
ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> DISABLE
</synopsis>
<varlistentry>
<term><literal>CONNECTION '<replaceable class="parameter">conninfo</replaceable>'</literal></term>
- <term><literal>SET PUBLICATION <replaceable class="parameter">publication_name</replaceable></literal></term>
<term><literal>SLOT NAME = <replaceable class="parameter">slot_name</replaceable></literal></term>
<listitem>
<para>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>SET PUBLICATION <replaceable class="parameter">publication_name</replaceable></literal></term>
+ <listitem>
+ <para>
+ Changes list of subscribed publications. See
+ <xref linkend="SQL-CREATESUBSCRIPTION"> for more information.
+ </para>
+ <para>
+ When <literal>REFRESH</literal> is specified, this command will also
+ act like <literal>REFRESH PUBLICATION</literal>. When
+ <literal>NOREFRESH</literal> is specified, the comamnd will not try to
+ refresh table information.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>REFRESH PUBLICATION</term>
+ <listitem>
+ <para>
+ Fetch missing table information from publisher. This will start
+ replication of tables that were added to the subscribed-to publications
+ since the last invocation of <command>REFRESH PUBLICATION</command> or
+ since <command>CREATE SUBSCRIPTION</command>.
+ </para>
+ <para>
+ The <literal>COPY DATA</literal> and <literal>NOCOPY DATA</literal>
+ options specify if the existing data in the publications that are being
+ subscribed to should be copied. <literal>COPY DATA</literal> is the
+ default.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><literal>ENABLE</literal></term>
<listitem>
</para>
</listitem>
</varlistentry>
+
</variablelist>
</refsect1>
| ENABLED | DISABLED
| CREATE SLOT | NOCREATE SLOT
| SLOT NAME = <replaceable class="PARAMETER">slot_name</replaceable>
+ | COPY DATA | NOCOPY DATA
+ | NOCONNECT
</synopsis>
</refsynopsisdiv>
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>COPY DATA</term>
+ <term>NOCOPY DATA</term>
+ <listitem>
+ <para>
+ Specifies if the existing data in the publications that are being
+ subscribed to should be copied once the replication starts.
+ <literal>COPY DATA</literal> is the default.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>NOCONNECT</term>
+ <listitem>
+ <para>
+ Instructs <command>CREATE SUBSCRIPTION</command> to skip the initial
+ connection to the provider. This will change default values of other
+ options to <literal>DISABLED</literal>,
+ <literal>NOCREATE SLOT</literal>, and <literal>NOCOPY DATA</literal>.
+ </para>
+ <para>
+ It's not allowed to combine <literal>NOCONNECT</literal> and
+ <literal>ENABLED</literal>, <literal>CREATE SLOT</literal>, or
+ <literal>COPY DATA</literal>.
+ </para>
+ <para>
+ Since no connection is made when this option is specified, the tables
+ are not subscribed, so after you enable the subscription nothing will
+ be replicated. It is required to run
+ <literal>ALTER SUBSCRIPTION ... REFRESH PUBLICATION</> in order for
+ tables to be subscribed.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
</varlistentry>
<varlistentry>
- <term><option>--no-create-subscription-slots</option></term>
+ <term><option>--no-security-labels</option></term>
<listitem>
<para>
- When dumping logical replication subscriptions,
- generate <command>CREATE SUBSCRIPTION</command> commands that do not
- create the remote replication slot. That way, the dump can be
- restored without requiring network access to the remote servers.
+ Do not dump security labels.
</para>
</listitem>
</varlistentry>
<varlistentry>
- <term><option>--no-security-labels</option></term>
+ <term><option>--no-subscription-connect</option></term>
<listitem>
<para>
- Do not dump security labels.
+ When dumping logical replication subscriptions,
+ generate <command>CREATE SUBSCRIPTION</command> commands that do not
+ make remote connections for creating replication slot or initial table
+ copy. That way, the dump can be restored without requiring network
+ access to the remote servers.
</para>
</listitem>
</varlistentry>
pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \
pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \
pg_sequence.h pg_publication.h pg_publication_rel.h pg_subscription.h \
+ pg_subscription_rel.h toasting.h indexing.h \
toasting.h indexing.h \
)
#include "catalog/pg_opclass.h"
#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_subscription_rel.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
#include "catalog/pg_type_fn.h"
*/
relation_close(rel, NoLock);
+ /*
+ * Remove any associated relation synchronization states.
+ */
+ RemoveSubscriptionRel(InvalidOid, relid);
+
/*
* Forget any ON COMMIT action for the rel
*/
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(pubid));
- scan = systable_beginscan(pubrelsrel, PublicationRelMapIndexId, true,
- NULL, 1, &scankey);
+ scan = systable_beginscan(pubrelsrel, PublicationRelPrrelidPrpubidIndexId,
+ true, NULL, 1, &scankey);
result = NIL;
while (HeapTupleIsValid(tup = systable_getnext(scan)))
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
+#include "access/xact.h"
+#include "catalog/indexing.h"
#include "catalog/pg_type.h"
#include "catalog/pg_subscription.h"
+#include "catalog/pg_subscription_rel.h"
#include "nodes/makefuncs.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
+#include "utils/pg_lsn.h"
+#include "utils/rel.h"
#include "utils/syscache.h"
return res;
}
+
+/*
+ * Set the state of a subscription table.
+ */
+Oid
+SetSubscriptionRelState(Oid subid, Oid relid, char state,
+ XLogRecPtr sublsn)
+{
+ Relation rel;
+ HeapTuple tup;
+ Oid subrelid;
+ bool nulls[Natts_pg_subscription_rel];
+ Datum values[Natts_pg_subscription_rel];
+
+ /* Prevent concurrent changes. */
+ rel = heap_open(SubscriptionRelRelationId, ShareRowExclusiveLock);
+
+ /* Try finding existing mapping. */
+ tup = SearchSysCacheCopy2(SUBSCRIPTIONRELMAP,
+ ObjectIdGetDatum(relid),
+ ObjectIdGetDatum(subid));
+
+ /*
+ * If the record for given table does not exist yet create new
+ * record, otherwise update the existing one.
+ */
+ if (!HeapTupleIsValid(tup))
+ {
+ /* Form the tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ values[Anum_pg_subscription_rel_srsubid - 1] = ObjectIdGetDatum(subid);
+ values[Anum_pg_subscription_rel_srrelid - 1] = ObjectIdGetDatum(relid);
+ values[Anum_pg_subscription_rel_srsubstate - 1] = CharGetDatum(state);
+ if (sublsn != InvalidXLogRecPtr)
+ values[Anum_pg_subscription_rel_srsublsn - 1] = LSNGetDatum(sublsn);
+ else
+ nulls[Anum_pg_subscription_rel_srsublsn - 1] = true;
+
+ tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+ /* Insert tuple into catalog. */
+ subrelid = CatalogTupleInsert(rel, tup);
+
+ heap_freetuple(tup);
+ }
+ else
+ {
+ bool replaces[Natts_pg_subscription_rel];
+
+ /* Update the tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, false, sizeof(replaces));
+
+ replaces[Anum_pg_subscription_rel_srsubstate - 1] = true;
+ values[Anum_pg_subscription_rel_srsubstate - 1] = CharGetDatum(state);
+
+ replaces[Anum_pg_subscription_rel_srsublsn - 1] = true;
+ if (sublsn != InvalidXLogRecPtr)
+ values[Anum_pg_subscription_rel_srsublsn - 1] = LSNGetDatum(sublsn);
+ else
+ nulls[Anum_pg_subscription_rel_srsublsn - 1] = true;
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
+ replaces);
+
+ /* Update the catalog. */
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ subrelid = HeapTupleGetOid(tup);
+ }
+
+ /* Cleanup. */
+ heap_close(rel, NoLock);
+
+ return subrelid;
+}
+
+/*
+ * Get state of subscription table.
+ *
+ * Returns SUBREL_STATE_UNKNOWN when not found and missing_ok is true.
+ */
+char
+GetSubscriptionRelState(Oid subid, Oid relid, XLogRecPtr *sublsn,
+ bool missing_ok)
+{
+ Relation rel;
+ HeapTuple tup;
+ char substate;
+ bool isnull;
+ Datum d;
+
+ rel = heap_open(SubscriptionRelRelationId, AccessShareLock);
+
+ /* Try finding the mapping. */
+ tup = SearchSysCache2(SUBSCRIPTIONRELMAP,
+ ObjectIdGetDatum(relid),
+ ObjectIdGetDatum(subid));
+
+ if (!HeapTupleIsValid(tup))
+ {
+ if (missing_ok)
+ {
+ heap_close(rel, AccessShareLock);
+ *sublsn = InvalidXLogRecPtr;
+ return SUBREL_STATE_UNKNOWN;
+ }
+
+ elog(ERROR, "subscription table %u in subscription %u does not exist",
+ relid, subid);
+ }
+
+ /* Get the state. */
+ d = SysCacheGetAttr(SUBSCRIPTIONRELMAP, tup,
+ Anum_pg_subscription_rel_srsubstate, &isnull);
+ Assert(!isnull);
+ substate = DatumGetChar(d);
+ d = SysCacheGetAttr(SUBSCRIPTIONRELMAP, tup,
+ Anum_pg_subscription_rel_srsublsn, &isnull);
+ if (isnull)
+ *sublsn = InvalidXLogRecPtr;
+ else
+ *sublsn = DatumGetLSN(d);
+
+ /* Cleanup */
+ ReleaseSysCache(tup);
+ heap_close(rel, AccessShareLock);
+
+ return substate;
+}
+
+/*
+ * Drop subscription relation mapping. These can be for a particular
+ * subscription, or for a particular relation, or both.
+ */
+void
+RemoveSubscriptionRel(Oid subid, Oid relid)
+{
+ Relation rel;
+ HeapScanDesc scan;
+ ScanKeyData skey[2];
+ HeapTuple tup;
+ int nkeys = 0;
+
+ /* Prevent concurrent changes (see SetSubscriptionRelState()). */
+ rel = heap_open(SubscriptionRelRelationId, ShareRowExclusiveLock);
+
+ if (OidIsValid(subid))
+ {
+ ScanKeyInit(&skey[nkeys++],
+ Anum_pg_subscription_rel_srsubid,
+ BTEqualStrategyNumber,
+ F_OIDEQ,
+ ObjectIdGetDatum(subid));
+ }
+
+ if (OidIsValid(relid))
+ {
+ ScanKeyInit(&skey[nkeys++],
+ Anum_pg_subscription_rel_srrelid,
+ BTEqualStrategyNumber,
+ F_OIDEQ,
+ ObjectIdGetDatum(relid));
+ }
+
+ /* Do the search and delete what we found. */
+ scan = heap_beginscan_catalog(rel, nkeys, skey);
+ while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+ {
+ simple_heap_delete(rel, &tup->t_self);
+ }
+ heap_endscan(scan);
+
+ heap_close(rel, ShareRowExclusiveLock);
+}
+
+
+/*
+ * Get all relations for subscription.
+ *
+ * Returned list is palloced in current memory context.
+ */
+List *
+GetSubscriptionRelations(Oid subid)
+{
+ List *res = NIL;
+ Relation rel;
+ HeapTuple tup;
+ int nkeys = 0;
+ ScanKeyData skey[2];
+ SysScanDesc scan;
+
+ rel = heap_open(SubscriptionRelRelationId, AccessShareLock);
+
+ ScanKeyInit(&skey[nkeys++],
+ Anum_pg_subscription_rel_srsubid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(subid));
+
+ scan = systable_beginscan(rel, InvalidOid, false,
+ NULL, nkeys, skey);
+
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ {
+ Form_pg_subscription_rel subrel;
+ SubscriptionRelState *relstate;
+
+ subrel = (Form_pg_subscription_rel) GETSTRUCT(tup);
+
+ relstate = (SubscriptionRelState *)palloc(sizeof(SubscriptionRelState));
+ relstate->relid = subrel->srrelid;
+ relstate->state = subrel->srsubstate;
+ relstate->lsn = subrel->srsublsn;
+
+ res = lappend(res, relstate);
+ }
+
+ /* Cleanup */
+ systable_endscan(scan);
+ heap_close(rel, AccessShareLock);
+
+ return res;
+}
+
+/*
+ * Get all relations for subscription that are not in a ready state.
+ *
+ * Returned list is palloced in current memory context.
+ */
+List *
+GetSubscriptionNotReadyRelations(Oid subid)
+{
+ List *res = NIL;
+ Relation rel;
+ HeapTuple tup;
+ int nkeys = 0;
+ ScanKeyData skey[2];
+ SysScanDesc scan;
+
+ rel = heap_open(SubscriptionRelRelationId, AccessShareLock);
+
+ ScanKeyInit(&skey[nkeys++],
+ Anum_pg_subscription_rel_srsubid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(subid));
+
+ ScanKeyInit(&skey[nkeys++],
+ Anum_pg_subscription_rel_srsubstate,
+ BTEqualStrategyNumber, F_CHARNE,
+ CharGetDatum(SUBREL_STATE_READY));
+
+ scan = systable_beginscan(rel, InvalidOid, false,
+ NULL, nkeys, skey);
+
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ {
+ Form_pg_subscription_rel subrel;
+ SubscriptionRelState *relstate;
+
+ subrel = (Form_pg_subscription_rel) GETSTRUCT(tup);
+
+ relstate = (SubscriptionRelState *)palloc(sizeof(SubscriptionRelState));
+ relstate->relid = subrel->srrelid;
+ relstate->state = subrel->srsubstate;
+ relstate->lsn = subrel->srsublsn;
+
+ res = lappend(res, relstate);
+ }
+
+ /* Cleanup */
+ systable_endscan(scan);
+ heap_close(rel, AccessShareLock);
+
+ return res;
+}
su.oid AS subid,
su.subname,
st.pid,
+ st.relid,
st.received_lsn,
st.last_msg_send_time,
st.last_msg_receipt_time,
{
COPY_FILE, /* to/from file (or a piped program) */
COPY_OLD_FE, /* to/from frontend (2.0 protocol) */
- COPY_NEW_FE /* to/from frontend (3.0 protocol) */
+ COPY_NEW_FE, /* to/from frontend (3.0 protocol) */
+ COPY_CALLBACK /* to/from callback function */
} CopyDest;
/*
List *attnumlist; /* integer list of attnums to copy */
char *filename; /* filename, or NULL for STDIN/STDOUT */
bool is_program; /* is 'filename' a program to popen? */
+ copy_data_source_cb data_source_cb; /* function for reading data*/
bool binary; /* binary format? */
bool oids; /* include OIDs? */
bool freeze; /* freeze rows on loading? */
static uint64 CopyTo(CopyState cstate);
static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
Datum *values, bool *nulls);
-static uint64 CopyFrom(CopyState cstate);
static void CopyFromInsertBatch(CopyState cstate, EState *estate,
CommandId mycid, int hi_options,
ResultRelInfo *resultRelInfo, TupleTableSlot *myslot,
/* Dump the accumulated row as one CopyData message */
(void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len);
break;
+ case COPY_CALLBACK:
+ Assert(false); /* Not yet supported. */
+ break;
}
resetStringInfo(fe_msgbuf);
bytesread += avail;
}
break;
+ case COPY_CALLBACK:
+ bytesread = cstate->data_source_cb(databuf, minread, maxread);
+ break;
}
return bytesread;
PreventCommandIfParallelMode("COPY FROM");
cstate = BeginCopyFrom(pstate, rel, stmt->filename, stmt->is_program,
- stmt->attlist, stmt->options);
+ NULL, stmt->attlist, stmt->options);
cstate->range_table = range_table;
*processed = CopyFrom(cstate); /* copy from file to database */
EndCopyFrom(cstate);
/*
* Copy FROM file to relation.
*/
-static uint64
+uint64
CopyFrom(CopyState cstate)
{
HeapTuple tuple;
Relation rel,
const char *filename,
bool is_program,
+ copy_data_source_cb data_source_cb,
List *attnamelist,
List *options)
{
cstate->num_defaults = num_defaults;
cstate->is_program = is_program;
- if (pipe)
+ if (data_source_cb)
+ {
+ cstate->copy_dest = COPY_CALLBACK;
+ cstate->data_source_cb = data_source_cb;
+ }
+ else if (pipe)
{
Assert(!is_program); /* the grammar does not allow this */
if (whereToSendOutput == DestRemote)
#include "access/htup_details.h"
#include "access/xact.h"
+#include "catalog/dependency.h"
#include "catalog/indexing.h"
+#include "catalog/namespace.h"
#include "catalog/objectaccess.h"
#include "catalog/objectaddress.h"
#include "catalog/pg_type.h"
#include "catalog/pg_subscription.h"
+#include "catalog/pg_subscription_rel.h"
#include "commands/defrem.h"
#include "commands/event_trigger.h"
#include "commands/subscriptioncmds.h"
+#include "nodes/makefuncs.h"
+
#include "replication/logicallauncher.h"
#include "replication/origin.h"
#include "replication/walreceiver.h"
+#include "replication/walsender.h"
#include "replication/worker_internal.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"
+#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
+static List *fetch_table_list(WalReceiverConn *wrconn, List *publications);
+
/*
* Common option parsing function for CREATE and ALTER SUBSCRIPTION commands.
*
* accomodate that.
*/
static void
-parse_subscription_options(List *options, char **conninfo,
- List **publications, bool *enabled_given,
- bool *enabled, bool *create_slot, char **slot_name)
+parse_subscription_options(List *options, bool *connect, bool *enabled_given,
+ bool *enabled, bool *create_slot, char **slot_name,
+ bool *copy_data)
{
ListCell *lc;
+ bool connect_given = false;
bool create_slot_given = false;
+ bool copy_data_given = false;
- if (conninfo)
- *conninfo = NULL;
- if (publications)
- *publications = NIL;
+ if (connect)
+ *connect = true;
if (enabled)
{
*enabled_given = false;
*create_slot = true;
if (slot_name)
*slot_name = NULL;
+ if (copy_data)
+ *copy_data = true;
/* Parse options */
foreach (lc, options)
{
DefElem *defel = (DefElem *) lfirst(lc);
- if (strcmp(defel->defname, "conninfo") == 0 && conninfo)
- {
- if (*conninfo)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("conflicting or redundant options")));
-
- *conninfo = defGetString(defel);
- }
- else if (strcmp(defel->defname, "publication") == 0 && publications)
+ if (strcmp(defel->defname, "noconnect") == 0 && connect)
{
- if (*publications)
+ if (connect_given)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
- *publications = defGetStringList(defel);
+ connect_given = true;
+ *connect = !defGetBoolean(defel);
}
else if (strcmp(defel->defname, "enabled") == 0 && enabled)
{
*slot_name = defGetString(defel);
}
+ else if (strcmp(defel->defname, "copy data") == 0 && copy_data)
+ {
+ if (copy_data_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ copy_data_given = true;
+ *copy_data = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "nocopy data") == 0 && copy_data)
+ {
+ if (copy_data_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ copy_data_given = true;
+ *copy_data = !defGetBoolean(defel);
+ }
else
elog(ERROR, "unrecognized option: %s", defel->defname);
}
+
+ /*
+ * We've been explicitly asked to not connect, that requires some
+ * additional processing.
+ */
+ if (connect && !*connect)
+ {
+ /* Check for incompatible options from the user. */
+ if (*enabled_given && *enabled)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("noconnect and enabled are mutually exclusive options")));
+
+ if (create_slot_given && *create_slot)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("noconnect and create slot are mutually exclusive options")));
+
+ if (copy_data_given && *copy_data)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("noconnect and copy data are mutually exclusive options")));
+
+ /* Change the defaults of other options. */
+ *enabled = false;
+ *create_slot = false;
+ *copy_data = false;
+ }
}
/*
Datum values[Natts_pg_subscription];
Oid owner = GetUserId();
HeapTuple tup;
+ bool connect;
bool enabled_given;
bool enabled;
+ bool copy_data;
char *conninfo;
char *slotname;
char originname[NAMEDATALEN];
* Parse and check options.
* Connection and publication should not be specified here.
*/
- parse_subscription_options(stmt->options, NULL, NULL,
- &enabled_given, &enabled,
- &create_slot, &slotname);
+ parse_subscription_options(stmt->options, &connect, &enabled_given,
+ &enabled, &create_slot, &slotname, ©_data);
/*
* Since creating a replication slot is not transactional, rolling back
replorigin_create(originname);
/*
- * If requested, create the replication slot on remote side for our
- * newly created subscription.
+ * Connect to remote side to execute requested commands and fetch table
+ * info.
*/
- if (create_slot)
+ if (connect)
{
XLogRecPtr lsn;
char *err;
WalReceiverConn *wrconn;
+ List *tables;
+ ListCell *lc;
+ char table_state;
/* Try to connect to the publisher. */
wrconn = walrcv_connect(conninfo, true, stmt->subname, &err);
PG_TRY();
{
/*
- * Create permanent slot for the subscription. We won't use the
- * initial snapshot for anything, so no need to export it.
+ * If requested, create permanent slot for the subscription.
+ * We won't use the initial snapshot for anything, so no need
+ * to export it.
+ */
+ if (create_slot)
+ {
+ walrcv_create_slot(wrconn, slotname, false,
+ CRS_NOEXPORT_SNAPSHOT, &lsn);
+ ereport(NOTICE,
+ (errmsg("created replication slot \"%s\" on publisher",
+ slotname)));
+ }
+
+ /*
+ * Set sync state based on if we were asked to do data copy or
+ * not.
*/
- walrcv_create_slot(wrconn, slotname, false, false, &lsn);
+ table_state = copy_data ? SUBREL_STATE_INIT : SUBREL_STATE_READY;
+
+ /*
+ * Get the table list from publisher and build local table status
+ * info.
+ */
+ tables = fetch_table_list(wrconn, publications);
+ foreach (lc, tables)
+ {
+ RangeVar *rv = (RangeVar *) lfirst(lc);
+ Oid relid;
+
+ relid = RangeVarGetRelid(rv, AccessShareLock, true);
+
+ SetSubscriptionRelState(subid, relid, table_state,
+ InvalidXLogRecPtr);
+ }
+
ereport(NOTICE,
- (errmsg("created replication slot \"%s\" on publisher",
- slotname)));
+ (errmsg("synchronized table states")));
}
PG_CATCH();
{
/* And we are done with the remote side. */
walrcv_disconnect(wrconn);
}
+ else
+ ereport(WARNING,
+ (errmsg("tables were not subscribed, you will have to run "
+ "ALTER SUBSCRIPTION ... REFRESH PUBLICATION to "
+ "subscribe the tables")));
heap_close(rel, RowExclusiveLock);
return myself;
}
+static void
+AlterSubscription_refresh(Subscription *sub, bool copy_data)
+{
+ char *err;
+ List *pubrel_names;
+ List *subrel_states;
+ Oid *subrel_local_oids;
+ Oid *pubrel_local_oids;
+ ListCell *lc;
+ int off;
+
+ /* Load the library providing us libpq calls. */
+ load_file("libpqwalreceiver", false);
+
+ /* Try to connect to the publisher. */
+ wrconn = walrcv_connect(sub->conninfo, true, sub->name, &err);
+ if (!wrconn)
+ ereport(ERROR,
+ (errmsg("could not connect to the publisher: %s", err)));
+
+ /* Get the table list from publisher. */
+ pubrel_names = fetch_table_list(wrconn, sub->publications);
+
+ /* We are done with the remote side, close connection. */
+ walrcv_disconnect(wrconn);
+
+ /* Get local table list. */
+ subrel_states = GetSubscriptionRelations(sub->oid);
+
+ /*
+ * Build qsorted array of local table oids for faster lookup.
+ * This can potentially contain all tables in the database so
+ * speed of lookup is important.
+ */
+ subrel_local_oids = palloc(list_length(subrel_states) * sizeof(Oid));
+ off = 0;
+ foreach(lc, subrel_states)
+ {
+ SubscriptionRelState *relstate = (SubscriptionRelState *) lfirst(lc);
+ subrel_local_oids[off++] = relstate->relid;
+ }
+ qsort(subrel_local_oids, list_length(subrel_states),
+ sizeof(Oid), oid_cmp);
+
+ /*
+ * Walk over the remote tables and try to match them to locally
+ * known tables. If the table is not known locally create a new state
+ * for it.
+ *
+ * Also builds array of local oids of remote tables for the next step.
+ */
+ off = 0;
+ pubrel_local_oids = palloc(list_length(pubrel_names) * sizeof(Oid));
+
+ foreach (lc, pubrel_names)
+ {
+ RangeVar *rv = (RangeVar *) lfirst(lc);
+ Oid relid;
+
+ relid = RangeVarGetRelid(rv, AccessShareLock, false);
+ pubrel_local_oids[off++] = relid;
+
+ if (!bsearch(&relid, subrel_local_oids,
+ list_length(subrel_states), sizeof(Oid), oid_cmp))
+ {
+ SetSubscriptionRelState(sub->oid, relid,
+ copy_data ? SUBREL_STATE_INIT : SUBREL_STATE_READY,
+ InvalidXLogRecPtr);
+ ereport(NOTICE,
+ (errmsg("added subscription for table %s.%s",
+ quote_identifier(rv->schemaname),
+ quote_identifier(rv->relname))));
+ }
+ }
+
+ /*
+ * Next remove state for tables we should not care about anymore using
+ * the data we collected above
+ */
+ qsort(pubrel_local_oids, list_length(pubrel_names),
+ sizeof(Oid), oid_cmp);
+
+ for (off = 0; off < list_length(subrel_states); off++)
+ {
+ Oid relid = subrel_local_oids[off];
+
+ if (!bsearch(&relid, pubrel_local_oids,
+ list_length(pubrel_names), sizeof(Oid), oid_cmp))
+ {
+ char *namespace;
+
+ RemoveSubscriptionRel(sub->oid, relid);
+
+ namespace = get_namespace_name(get_rel_namespace(relid));
+ ereport(NOTICE,
+ (errmsg("removed subscription for table %s.%s",
+ quote_identifier(namespace),
+ quote_identifier(get_rel_name(relid)))));
+ }
+ }
+}
+
/*
* Alter the existing subscription.
*/
Datum values[Natts_pg_subscription];
HeapTuple tup;
Oid subid;
- bool enabled_given;
- bool enabled;
- char *conninfo;
- char *slot_name;
- List *publications;
+ bool update_tuple = false;
rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
subid = HeapTupleGetOid(tup);
- /* Parse options. */
- parse_subscription_options(stmt->options, &conninfo, &publications,
- &enabled_given, &enabled,
- NULL, &slot_name);
-
/* Form a new tuple. */
memset(values, 0, sizeof(values));
memset(nulls, false, sizeof(nulls));
memset(replaces, false, sizeof(replaces));
- if (enabled_given)
- {
- values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(enabled);
- replaces[Anum_pg_subscription_subenabled - 1] = true;
- }
- if (conninfo)
- {
- values[Anum_pg_subscription_subconninfo - 1] =
- CStringGetTextDatum(conninfo);
- replaces[Anum_pg_subscription_subconninfo - 1] = true;
- }
- if (slot_name)
- {
- values[Anum_pg_subscription_subslotname - 1] =
- DirectFunctionCall1(namein, CStringGetDatum(slot_name));
- replaces[Anum_pg_subscription_subslotname - 1] = true;
- }
- if (publications != NIL)
+ switch (stmt->kind)
{
- values[Anum_pg_subscription_subpublications - 1] =
- publicationListToArray(publications);
- replaces[Anum_pg_subscription_subpublications - 1] = true;
+ case ALTER_SUBSCRIPTION_OPTIONS:
+ {
+ char *slot_name;
+
+ parse_subscription_options(stmt->options, NULL, NULL, NULL,
+ NULL, &slot_name, NULL);
+
+ values[Anum_pg_subscription_subslotname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(slot_name));
+ replaces[Anum_pg_subscription_subslotname - 1] = true;
+
+ update_tuple = true;
+ break;
+ }
+
+ case ALTER_SUBSCRIPTION_ENABLED:
+ {
+ bool enabled,
+ enabled_given;
+
+ parse_subscription_options(stmt->options, NULL,
+ &enabled_given, &enabled, NULL,
+ NULL, NULL);
+ Assert(enabled_given);
+
+ values[Anum_pg_subscription_subenabled - 1] =
+ BoolGetDatum(enabled);
+ replaces[Anum_pg_subscription_subenabled - 1] = true;
+
+ update_tuple = true;
+ break;
+ }
+
+ case ALTER_SUBSCRIPTION_CONNECTION:
+ values[Anum_pg_subscription_subconninfo - 1] =
+ CStringGetTextDatum(stmt->conninfo);
+ replaces[Anum_pg_subscription_subconninfo - 1] = true;
+ update_tuple = true;
+ break;
+
+ case ALTER_SUBSCRIPTION_PUBLICATION:
+ case ALTER_SUBSCRIPTION_PUBLICATION_REFRESH:
+ {
+ bool copy_data;
+ Subscription *sub = GetSubscription(subid, false);
+
+ parse_subscription_options(stmt->options, NULL, NULL, NULL,
+ NULL, NULL, ©_data);
+
+ values[Anum_pg_subscription_subpublications - 1] =
+ publicationListToArray(stmt->publication);
+ replaces[Anum_pg_subscription_subpublications - 1] = true;
+
+ update_tuple = true;
+
+ /* Refresh if user asked us to. */
+ if (stmt->kind == ALTER_SUBSCRIPTION_PUBLICATION_REFRESH)
+ {
+ /* Make sure refresh sees the new list of publications. */
+ sub->publications = stmt->publication;
+
+ AlterSubscription_refresh(sub, copy_data);
+ }
+
+ break;
+ }
+
+ case ALTER_SUBSCRIPTION_REFRESH:
+ {
+ bool copy_data;
+ Subscription *sub = GetSubscription(subid, false);
+
+ parse_subscription_options(stmt->options, NULL, NULL, NULL,
+ NULL, NULL, ©_data);
+
+ AlterSubscription_refresh(sub, copy_data);
+
+ break;
+ }
+
+ default:
+ elog(ERROR, "unrecognized ALTER SUBSCRIPTION kind %d",
+ stmt->kind);
}
- tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
- replaces);
+ /* Update the catalog if needed. */
+ if (update_tuple)
+ {
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
+ replaces);
- /* Update the catalog. */
- CatalogTupleUpdate(rel, &tup->t_self, tup);
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
- ObjectAddressSet(myself, SubscriptionRelationId, subid);
+ heap_freetuple(tup);
+ }
- /* Cleanup. */
- heap_freetuple(tup);
heap_close(rel, RowExclusiveLock);
+ ObjectAddressSet(myself, SubscriptionRelationId, subid);
+
InvokeObjectPostAlterHook(SubscriptionRelationId, subid, 0);
return myself;
/* Clean up dependencies */
deleteSharedDependencyRecordsFor(SubscriptionRelationId, subid, 0);
+ /* Remove any associated relation synchronization states. */
+ RemoveSubscriptionRel(subid, InvalidOid);
+
/* Kill the apply worker so that the slot becomes accessible. */
- logicalrep_worker_stop(subid);
+ logicalrep_worker_stop(subid, InvalidOid);
/* Remove the origin tracking if exists. */
snprintf(originname, sizeof(originname), "pg_%u", subid);
PG_TRY();
{
- if (!walrcv_command(wrconn, cmd.data, &err))
+ WalRcvExecResult *res;
+ res = walrcv_exec(wrconn, cmd.data, 0, NULL);
+
+ if (res->status != WALRCV_OK_COMMAND)
ereport(ERROR,
(errmsg("could not drop the replication slot \"%s\" on publisher",
slotname),
- errdetail("The error was: %s", err)));
+ errdetail("The error was: %s", res->err)));
else
ereport(NOTICE,
(errmsg("dropped replication slot \"%s\" on publisher",
slotname)));
+
+ walrcv_clear_result(res);
}
PG_CATCH();
{
heap_close(rel, RowExclusiveLock);
}
+
+/*
+ * Get the list of tables which belong to specified publications on the
+ * publisher connection.
+ */
+static List *
+fetch_table_list(WalReceiverConn *wrconn, List *publications)
+{
+ WalRcvExecResult *res;
+ StringInfoData cmd;
+ TupleTableSlot *slot;
+ Oid tableRow[2] = {TEXTOID, TEXTOID};
+ ListCell *lc;
+ bool first;
+ List *tablelist = NIL;
+
+ Assert(list_length(publications) > 0);
+
+ initStringInfo(&cmd);
+ appendStringInfo(&cmd, "SELECT DISTINCT t.schemaname, t.tablename\n"
+ " FROM pg_catalog.pg_publication_tables t\n"
+ " WHERE t.pubname IN (");
+ first = true;
+ foreach (lc, publications)
+ {
+ char *pubname = strVal(lfirst(lc));
+
+ if (first)
+ first = false;
+ else
+ appendStringInfoString(&cmd, ", ");
+
+ appendStringInfo(&cmd, "%s", quote_literal_cstr(pubname));
+ }
+ appendStringInfoString(&cmd, ")");
+
+ res = walrcv_exec(wrconn, cmd.data, 2, tableRow);
+ pfree(cmd.data);
+
+ if (res->status != WALRCV_OK_TUPLES)
+ ereport(ERROR,
+ (errmsg("could not receive list of replicated tables from the publisher: %s",
+ res->err)));
+
+ /* Process tables. */
+ slot = MakeSingleTupleTableSlot(res->tupledesc);
+ while (tuplestore_gettupleslot(res->tuplestore, true, false, slot))
+ {
+ char *nspname;
+ char *relname;
+ bool isnull;
+ RangeVar *rv;
+
+ nspname = TextDatumGetCString(slot_getattr(slot, 1, &isnull));
+ Assert(!isnull);
+ relname = TextDatumGetCString(slot_getattr(slot, 2, &isnull));
+ Assert(!isnull);
+
+ rv = makeRangeVar(pstrdup(nspname), pstrdup(relname), -1);
+ tablelist = lappend(tablelist, rv);
+
+ ExecClearTuple(slot);
+ }
+ ExecDropSingleTupleTableSlot(slot);
+
+ walrcv_clear_result(res);
+
+ return tablelist;
+}
MAPPING MATCH MATERIALIZED MAXVALUE METHOD MINUTE_P MINVALUE MODE MONTH_P MOVE
NAME_P NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NONE
- NOT NOTHING NOTIFY NOTNULL NOWAIT NULL_P NULLIF
+ NOREFRESH NOT NOTHING NOTIFY NOTNULL NOWAIT NULL_P NULLIF
NULLS_P NUMERIC
OBJECT_P OF OFF OFFSET OIDS OLD ON ONLY OPERATOR OPTION OPTIONS OR
{
AlterSubscriptionStmt *n =
makeNode(AlterSubscriptionStmt);
+ n->kind = ALTER_SUBSCRIPTION_OPTIONS;
n->subname = $3;
n->options = $5;
$$ = (Node *)n;
{
AlterSubscriptionStmt *n =
makeNode(AlterSubscriptionStmt);
+ n->kind = ALTER_SUBSCRIPTION_CONNECTION;
n->subname = $3;
- n->options = list_make1(makeDefElem("conninfo",
- (Node *)makeString($5), @1));
+ n->conninfo = $5;
+ $$ = (Node *)n;
+ }
+ | ALTER SUBSCRIPTION name REFRESH PUBLICATION opt_definition
+ {
+ AlterSubscriptionStmt *n =
+ makeNode(AlterSubscriptionStmt);
+ n->kind = ALTER_SUBSCRIPTION_REFRESH;
+ n->subname = $3;
+ n->options = $6;
+ $$ = (Node *)n;
+ }
+ | ALTER SUBSCRIPTION name SET PUBLICATION publication_name_list REFRESH opt_definition
+ {
+ AlterSubscriptionStmt *n =
+ makeNode(AlterSubscriptionStmt);
+ n->kind = ALTER_SUBSCRIPTION_PUBLICATION_REFRESH;
+ n->subname = $3;
+ n->publication = $6;
+ n->options = $8;
$$ = (Node *)n;
}
- | ALTER SUBSCRIPTION name SET PUBLICATION publication_name_list
+ | ALTER SUBSCRIPTION name SET PUBLICATION publication_name_list NOREFRESH
{
AlterSubscriptionStmt *n =
makeNode(AlterSubscriptionStmt);
+ n->kind = ALTER_SUBSCRIPTION_PUBLICATION;
n->subname = $3;
- n->options = list_make1(makeDefElem("publication",
- (Node *)$6, @1));
+ n->publication = $6;
+ n->options = NIL;
$$ = (Node *)n;
}
| ALTER SUBSCRIPTION name ENABLE_P
{
AlterSubscriptionStmt *n =
makeNode(AlterSubscriptionStmt);
+ n->kind = ALTER_SUBSCRIPTION_ENABLED;
n->subname = $3;
n->options = list_make1(makeDefElem("enabled",
(Node *)makeInteger(TRUE), @1));
{
AlterSubscriptionStmt *n =
makeNode(AlterSubscriptionStmt);
+ n->kind = ALTER_SUBSCRIPTION_ENABLED;
n->subname = $3;
n->options = list_make1(makeDefElem("enabled",
(Node *)makeInteger(FALSE), @1));
$$ = (Node *)n;
- } ;
+ }
+ ;
/*****************************************************************************
*
| NEW
| NEXT
| NO
+ | NOREFRESH
| NOTHING
| NOTIFY
| NOWAIT
case WAIT_EVENT_SYNC_REP:
event_name = "SyncRep";
break;
+ case WAIT_EVENT_LOGICAL_SYNC_DATA:
+ event_name = "LogicalSyncData";
+ break;
+ case WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE:
+ event_name = "LogicalSyncStateChange";
+ break;
/* no default case, so that compiler will warn */
}
#include "libpq-fe.h"
#include "pqexpbuffer.h"
#include "access/xlog.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
-#include "replication/logicalproto.h"
#include "replication/walreceiver.h"
-#include "storage/proc.h"
#include "utils/builtins.h"
+#include "utils/memutils.h"
#include "utils/pg_lsn.h"
+#include "utils/tuplestore.h"
PG_MODULE_MAGIC;
static char *libpqrcv_create_slot(WalReceiverConn *conn,
const char *slotname,
bool temporary,
- bool export_snapshot,
+ CRSSnapshotAction snapshot_action,
XLogRecPtr *lsn);
-static bool libpqrcv_command(WalReceiverConn *conn,
- const char *cmd, char **err);
+static WalRcvExecResult *libpqrcv_exec(WalReceiverConn *conn,
+ const char *query,
+ const int nRetTypes,
+ const Oid *retTypes);
static void libpqrcv_disconnect(WalReceiverConn *conn);
static WalReceiverFunctionsType PQWalReceiverFunctions = {
libpqrcv_receive,
libpqrcv_send,
libpqrcv_create_slot,
- libpqrcv_command,
+ libpqrcv_exec,
libpqrcv_disconnect
};
* next timeline's ID, or just CommandComplete if the server was shut
* down.
*
- * If we had not yet received CopyDone from the backend, PGRES_COPY_IN
- * would also be possible. However, at the moment this function is only
- * called after receiving CopyDone from the backend - the walreceiver
- * never terminates replication on its own initiative.
+ * If we had not yet received CopyDone from the backend, PGRES_COPY_OUT
+ * is also possible in case we aborted the copy in mid-stream.
*/
res = PQgetResult(conn->streamConn);
if (PQresultStatus(res) == PGRES_TUPLES_OK)
* Windows.
*
* The function is modeled on PQexec() in libpq, but only implements
- * those parts that are in use in the walreceiver.
+ * those parts that are in use in the walreceiver api.
*
* Queries are always executed on the connection in streamConn.
*/
/*
* PQexec() silently discards any prior query results on the connection.
- * This is not required for walreceiver since it's expected that walsender
- * won't generate any such junk results.
+ * This is not required for this function as it's expected that the
+ * caller (which is this library in all cases) will behave correctly and
+ * we don't have to be backwards compatible with old libpq.
*/
/*
/*
* Emulate the PQexec()'s behavior of returning the last result when
- * there are many. Since walsender will never generate multiple
- * results, we skip the concatenation of error messages.
+ * there are many. We are fine with returning just last error message.
*/
result = PQgetResult(streamConn);
if (result == NULL)
PGresult *res;
res = PQgetResult(conn->streamConn);
- if (PQresultStatus(res) == PGRES_COMMAND_OK ||
- PQresultStatus(res) == PGRES_COPY_IN)
+ if (PQresultStatus(res) == PGRES_COMMAND_OK)
+ {
+ PQclear(res);
+
+ /* Verify that there are no more results */
+ res = PQgetResult(conn->streamConn);
+ if (res != NULL)
+ ereport(ERROR,
+ (errmsg("unexpected result after CommandComplete: %s",
+ PQerrorMessage(conn->streamConn))));
+ return -1;
+ }
+ else if (PQresultStatus(res) == PGRES_COPY_IN)
{
PQclear(res);
return -1;
*/
static char *
libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname,
- bool temporary, bool export_snapshot, XLogRecPtr *lsn)
+ bool temporary, CRSSnapshotAction snapshot_action,
+ XLogRecPtr *lsn)
{
PGresult *res;
StringInfoData cmd;
if (conn->logical)
{
appendStringInfo(&cmd, " LOGICAL pgoutput");
- if (export_snapshot)
- appendStringInfo(&cmd, " EXPORT_SNAPSHOT");
- else
- appendStringInfo(&cmd, " NOEXPORT_SNAPSHOT");
+ switch (snapshot_action)
+ {
+ case CRS_EXPORT_SNAPSHOT:
+ appendStringInfo(&cmd, " EXPORT_SNAPSHOT");
+ break;
+ case CRS_NOEXPORT_SNAPSHOT:
+ appendStringInfo(&cmd, " NOEXPORT_SNAPSHOT");
+ break;
+ case CRS_USE_SNAPSHOT:
+ appendStringInfo(&cmd, " USE_SNAPSHOT");
+ break;
+ }
}
res = libpqrcv_PQexec(conn->streamConn, cmd.data);
}
/*
- * Run command.
+ * Convert tuple query result to tuplestore.
+ */
+static void
+libpqrcv_processTuples(PGresult *pgres, WalRcvExecResult *walres,
+ const int nRetTypes, const Oid *retTypes)
+{
+ int tupn;
+ int coln;
+ int nfields = PQnfields(pgres);
+ HeapTuple tuple;
+ AttInMetadata *attinmeta;
+ MemoryContext rowcontext;
+ MemoryContext oldcontext;
+
+ /* No point in doing anything here if there were no tuples returned. */
+ if (PQntuples(pgres) == 0)
+ return;
+
+ /* Make sure we got expected number of fields. */
+ if (nfields != nRetTypes)
+ ereport(ERROR,
+ (errmsg("invalid query responser"),
+ errdetail("Expected %d fields, got %d fields.",
+ nRetTypes, nfields)));
+
+
+ walres->tuplestore = tuplestore_begin_heap(true, false, work_mem);
+
+ /* Create tuple descriptor corresponding to expected result. */
+ walres->tupledesc = CreateTemplateTupleDesc(nRetTypes, false);
+ for (coln = 0; coln < nRetTypes; coln++)
+ TupleDescInitEntry(walres->tupledesc, (AttrNumber) coln + 1,
+ PQfname(pgres, coln), retTypes[coln], -1, 0);
+ attinmeta = TupleDescGetAttInMetadata(walres->tupledesc);
+
+ /* Create temporary context for local allocations. */
+ rowcontext = AllocSetContextCreate(CurrentMemoryContext,
+ "libpqrcv query result context",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /* Process returned rows. */
+ for (tupn = 0; tupn < PQntuples(pgres); tupn++)
+ {
+ char *cstrs[MaxTupleAttributeNumber];
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Do the allocations in temporary context. */
+ oldcontext = MemoryContextSwitchTo(rowcontext);
+
+ /*
+ * Fill cstrs with null-terminated strings of column values.
+ */
+ for (coln = 0; coln < nfields; coln++)
+ {
+ if (PQgetisnull(pgres, tupn, coln))
+ cstrs[coln] = NULL;
+ else
+ cstrs[coln] = PQgetvalue(pgres, tupn, coln);
+ }
+
+ /* Convert row to a tuple, and add it to the tuplestore */
+ tuple = BuildTupleFromCStrings(attinmeta, cstrs);
+ tuplestore_puttuple(walres->tuplestore, tuple);
+
+ /* Clean up */
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextReset(rowcontext);
+ }
+
+ MemoryContextDelete(rowcontext);
+}
+
+/*
+ * Public interface for sending generic queries (and commands).
*
- * Returns if the command has succeeded and fills the err with palloced
- * error message if not.
+ * This can only be called from process connected to database.
*/
-static bool
-libpqrcv_command(WalReceiverConn *conn, const char *cmd, char **err)
+static WalRcvExecResult *
+libpqrcv_exec(WalReceiverConn *conn, const char *query,
+ const int nRetTypes, const Oid *retTypes)
{
- PGresult *res;
+ PGresult *pgres = NULL;
+ WalRcvExecResult *walres = palloc0(sizeof(WalRcvExecResult));
- res = libpqrcv_PQexec(conn->streamConn, cmd);
+ if (MyDatabaseId == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("the query interface requires a database connection")));
- if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ pgres = libpqrcv_PQexec(conn->streamConn, query);
+
+ switch (PQresultStatus(pgres))
{
- PQclear(res);
- *err = pchomp(PQerrorMessage(conn->streamConn));
- return false;
+ case PGRES_SINGLE_TUPLE:
+ case PGRES_TUPLES_OK:
+ walres->status = WALRCV_OK_TUPLES;
+ libpqrcv_processTuples(pgres, walres, nRetTypes, retTypes);
+ break;
+
+ case PGRES_COPY_IN:
+ walres->status = WALRCV_OK_COPY_IN;
+ break;
+
+ case PGRES_COPY_OUT:
+ walres->status = WALRCV_OK_COPY_OUT;
+ break;
+
+ case PGRES_COPY_BOTH:
+ walres->status = WALRCV_OK_COPY_BOTH;
+ break;
+
+ case PGRES_COMMAND_OK:
+ walres->status = WALRCV_OK_COMMAND;
+ break;
+
+ /* Empty query is considered error. */
+ case PGRES_EMPTY_QUERY:
+ walres->status = WALRCV_ERROR;
+ walres->err = _("empty query");
+ break;
+
+ case PGRES_NONFATAL_ERROR:
+ case PGRES_FATAL_ERROR:
+ case PGRES_BAD_RESPONSE:
+ walres->status = WALRCV_ERROR;
+ walres->err = pchomp(PQerrorMessage(conn->streamConn));
+ break;
}
- PQclear(res);
+ PQclear(pgres);
- return true;
+ return walres;
}
/*
override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
OBJS = decode.o launcher.o logical.o logicalfuncs.o message.o origin.o \
- proto.o relation.o reorderbuffer.o snapbuild.o worker.o
+ proto.o relation.o reorderbuffer.o snapbuild.o tablesync.o worker.o
include $(top_srcdir)/src/backend/common.mk
#include "access/xact.h"
#include "catalog/pg_subscription.h"
+#include "catalog/pg_subscription_rel.h"
#include "libpq/pqsignal.h"
#define DEFAULT_NAPTIME_PER_CYCLE 180000L
int max_logical_replication_workers = 4;
+int max_sync_workers_per_subscription = 2;
+
LogicalRepWorker *MyLogicalRepWorker = NULL;
typedef struct LogicalRepCtxStruct
/*
* Walks the workers array and searches for one that matches given
- * subscription id.
+ * subscription id and relid.
*/
LogicalRepWorker *
-logicalrep_worker_find(Oid subid)
+logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
{
int i;
LogicalRepWorker *res = NULL;
Assert(LWLockHeldByMe(LogicalRepWorkerLock));
+
/* Search for attached worker for a given subscription id. */
for (i = 0; i < max_logical_replication_workers; i++)
{
LogicalRepWorker *w = &LogicalRepCtx->workers[i];
- if (w->subid == subid && w->proc && IsBackendPid(w->proc->pid))
+ if (w->subid == subid && w->relid == relid &&
+ (!only_running || (w->proc && IsBackendPid(w->proc->pid))))
{
res = w;
break;
* Start new apply background worker.
*/
void
-logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid)
+logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid,
+ Oid relid)
{
BackgroundWorker bgw;
BackgroundWorkerHandle *bgw_handle;
}
/* Prepare the worker info. */
- memset(worker, 0, sizeof(LogicalRepWorker));
+ worker->proc = NULL;
worker->dbid = dbid;
worker->userid = userid;
worker->subid = subid;
+ worker->relid = relid;
+ worker->relstate = SUBREL_STATE_UNKNOWN;
+ worker->relstate_lsn = InvalidXLogRecPtr;
+ worker->last_lsn = InvalidXLogRecPtr;
+ TIMESTAMP_NOBEGIN(worker->last_send_time);
+ TIMESTAMP_NOBEGIN(worker->last_recv_time);
+ worker->reply_lsn = InvalidXLogRecPtr;
+ TIMESTAMP_NOBEGIN(worker->reply_time);
LWLockRelease(LogicalRepWorkerLock);
BGWORKER_BACKEND_DATABASE_CONNECTION;
bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
bgw.bgw_main = ApplyWorkerMain;
- snprintf(bgw.bgw_name, BGW_MAXLEN,
- "logical replication worker for subscription %u", subid);
+ if (OidIsValid(relid))
+ snprintf(bgw.bgw_name, BGW_MAXLEN,
+ "logical replication worker for subscription %u sync %u", subid, relid);
+ else
+ snprintf(bgw.bgw_name, BGW_MAXLEN,
+ "logical replication worker for subscription %u", subid);
bgw.bgw_restart_time = BGW_NEVER_RESTART;
bgw.bgw_notify_pid = MyProcPid;
* slot.
*/
void
-logicalrep_worker_stop(Oid subid)
+logicalrep_worker_stop(Oid subid, Oid relid)
{
LogicalRepWorker *worker;
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
- worker = logicalrep_worker_find(subid);
+ worker = logicalrep_worker_find(subid, relid, false);
/* No worker, nothing to do. */
if (!worker)
}
}
+/*
+ * Wake up (using latch) the logical replication worker.
+ */
+void
+logicalrep_worker_wakeup(Oid subid, Oid relid)
+{
+ LogicalRepWorker *worker;
+
+ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+ worker = logicalrep_worker_find(subid, relid, true);
+ LWLockRelease(LogicalRepWorkerLock);
+
+ if (worker)
+ logicalrep_worker_wakeup_ptr(worker);
+}
+
+/*
+ * Wake up (using latch) the logical replication worker.
+ */
+void
+logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker)
+{
+ SetLatch(&worker->proc->procLatch);
+}
+
/*
* Attach to a slot.
*/
SetLatch(MyLatch);
}
+/*
+ * Count the number of registered (not necessarily running) sync workers
+ * for a subscription.
+ */
+int
+logicalrep_sync_worker_count(Oid subid)
+{
+ int i;
+ int res = 0;
+
+ Assert(LWLockHeldByMe(LogicalRepWorkerLock));
+
+ /* Search for attached worker for a given subscription id. */
+ for (i = 0; i < max_logical_replication_workers; i++)
+ {
+ LogicalRepWorker *w = &LogicalRepCtx->workers[i];
+ if (w->subid == subid && OidIsValid(w->relid))
+ res++;
+ }
+
+ return res;
+}
+
/*
* ApplyLauncherShmemSize
* Compute space needed for replication launcher shared memory
&found);
if (!found)
+ {
+ int slot;
+
memset(LogicalRepCtx, 0, ApplyLauncherShmemSize());
+
+ /* Initialize memory and spin locks for each worker slot. */
+ for (slot = 0; slot < max_logical_replication_workers; slot++)
+ {
+ LogicalRepWorker *worker = &LogicalRepCtx->workers[slot];
+
+ memset(worker, 0, sizeof(LogicalRepWorker));
+ SpinLockInit(&worker->relmutex);
+ }
+ }
}
/*
LogicalRepWorker *w;
LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
- w = logicalrep_worker_find(sub->oid);
+ w = logicalrep_worker_find(sub->oid, InvalidOid, false);
LWLockRelease(LogicalRepWorkerLock);
if (sub->enabled && w == NULL)
{
- logicalrep_worker_launch(sub->dbid, sub->oid, sub->name, sub->owner);
+ logicalrep_worker_launch(sub->dbid, sub->oid, sub->name,
+ sub->owner, InvalidOid);
last_start_time = now;
wait_time = wal_retrieve_retry_interval;
/* Limit to one worker per mainloop cycle. */
Datum
pg_stat_get_subscription(PG_FUNCTION_ARGS)
{
-#define PG_STAT_GET_SUBSCRIPTION_COLS 7
+#define PG_STAT_GET_SUBSCRIPTION_COLS 8
Oid subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
int i;
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
MemSet(nulls, 0, sizeof(nulls));
values[0] = ObjectIdGetDatum(worker.subid);
- values[1] = Int32GetDatum(worker_pid);
+ if (OidIsValid(worker.relid))
+ values[1] = ObjectIdGetDatum(worker.relid);
+ else
+ nulls[1] = true;
+ values[2] = Int32GetDatum(worker_pid);
if (XLogRecPtrIsInvalid(worker.last_lsn))
- nulls[2] = true;
+ nulls[3] = true;
else
- values[2] = LSNGetDatum(worker.last_lsn);
+ values[3] = LSNGetDatum(worker.last_lsn);
if (worker.last_send_time == 0)
- nulls[3] = true;
+ nulls[4] = true;
else
- values[3] = TimestampTzGetDatum(worker.last_send_time);
+ values[4] = TimestampTzGetDatum(worker.last_send_time);
if (worker.last_recv_time == 0)
- nulls[4] = true;
+ nulls[5] = true;
else
- values[4] = TimestampTzGetDatum(worker.last_recv_time);
+ values[5] = TimestampTzGetDatum(worker.last_recv_time);
if (XLogRecPtrIsInvalid(worker.reply_lsn))
- nulls[5] = true;
+ nulls[6] = true;
else
- values[5] = LSNGetDatum(worker.reply_lsn);
+ values[6] = LSNGetDatum(worker.reply_lsn);
if (worker.reply_time == 0)
- nulls[6] = true;
+ nulls[7] = true;
else
- values[6] = TimestampTzGetDatum(worker.reply_time);
+ values[7] = TimestampTzGetDatum(worker.reply_time);
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
#include "access/heapam.h"
#include "access/sysattr.h"
#include "catalog/namespace.h"
+#include "catalog/pg_subscription_rel.h"
#include "nodes/makefuncs.h"
#include "replication/logicalrelation.h"
#include "replication/worker_internal.h"
else
entry->localrel = heap_open(entry->localreloid, lockmode);
+ if (entry->state != SUBREL_STATE_READY)
+ entry->state = GetSubscriptionRelState(MySubscription->oid,
+ entry->localreloid,
+ &entry->statelsn,
+ true);
+
return entry;
}
}
/*
- * Export a snapshot so it can be set in another session with SET TRANSACTION
- * SNAPSHOT.
- *
- * For that we need to start a transaction in the current backend as the
- * importing side checks whether the source transaction is still open to make
- * sure the xmin horizon hasn't advanced since then.
+ * Build the initial slot snapshot and convert it to normal snapshot that
+ * is understood by HeapTupleSatisfiesMVCC.
*
- * After that we convert a locally built snapshot into the normal variant
- * understood by HeapTupleSatisfiesMVCC et al.
+ * The snapshot will be usable directly in current transaction or exported
+ * for loading in different transaction.
*/
-const char *
-SnapBuildExportSnapshot(SnapBuild *builder)
+Snapshot
+SnapBuildInitalSnapshot(SnapBuild *builder)
{
Snapshot snap;
- char *snapname;
TransactionId xid;
TransactionId *newxip;
int newxcnt = 0;
+ Assert(!FirstSnapshotSet);
+ Assert(XactIsoLevel = XACT_REPEATABLE_READ);
+
if (builder->state != SNAPBUILD_CONSISTENT)
- elog(ERROR, "cannot export a snapshot before reaching a consistent state");
+ elog(ERROR, "cannot build an initial slot snapshot before reaching a consistent state");
if (!builder->committed.includes_all_transactions)
- elog(ERROR, "cannot export a snapshot, not all transactions are monitored anymore");
+ elog(ERROR, "cannot build an initial slot snapshot, not all transactions are monitored anymore");
/* so we don't overwrite the existing value */
if (TransactionIdIsValid(MyPgXact->xmin))
- elog(ERROR, "cannot export a snapshot when MyPgXact->xmin already is valid");
-
- if (IsTransactionOrTransactionBlock())
- elog(ERROR, "cannot export a snapshot from within a transaction");
-
- if (SavedResourceOwnerDuringExport)
- elog(ERROR, "can only export one snapshot at a time");
-
- SavedResourceOwnerDuringExport = CurrentResourceOwner;
- ExportInProgress = true;
-
- StartTransactionCommand();
-
- Assert(!FirstSnapshotSet);
-
- /* There doesn't seem to a nice API to set these */
- XactIsoLevel = XACT_REPEATABLE_READ;
- XactReadOnly = true;
+ elog(ERROR, "cannot build an initial slot snapshot when MyPgXact->xmin already is valid");
snap = SnapBuildBuildSnapshot(builder, GetTopTransactionId());
if (test == NULL)
{
if (newxcnt >= GetMaxSnapshotXidCount())
- elog(ERROR, "snapshot too large");
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("initial slot snapshot too large")));
newxip[newxcnt++] = xid;
}
snap->xcnt = newxcnt;
snap->xip = newxip;
+ return snap;
+}
+
+/*
+ * Export a snapshot so it can be set in another session with SET TRANSACTION
+ * SNAPSHOT.
+ *
+ * For that we need to start a transaction in the current backend as the
+ * importing side checks whether the source transaction is still open to make
+ * sure the xmin horizon hasn't advanced since then.
+ */
+const char *
+SnapBuildExportSnapshot(SnapBuild *builder)
+{
+ Snapshot snap;
+ char *snapname;
+
+ if (IsTransactionOrTransactionBlock())
+ elog(ERROR, "cannot export a snapshot from within a transaction");
+
+ if (SavedResourceOwnerDuringExport)
+ elog(ERROR, "can only export one snapshot at a time");
+
+ SavedResourceOwnerDuringExport = CurrentResourceOwner;
+ ExportInProgress = true;
+
+ StartTransactionCommand();
+
+ /* There doesn't seem to a nice API to set these */
+ XactIsoLevel = XACT_REPEATABLE_READ;
+ XactReadOnly = true;
+
+ snap = SnapBuildInitalSnapshot(builder);
+
/*
- * now that we've built a plain snapshot, use the normal mechanisms for
- * exporting it
+ * now that we've built a plain snapshot, make it active and use the
+ * normal mechanisms for exporting it
*/
snapname = ExportSnapshot(snap);
--- /dev/null
+/*-------------------------------------------------------------------------
+ * tablesync.c
+ * PostgreSQL logical replication
+ *
+ * Copyright (c) 2012-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/replication/logical/tablesync.c
+ *
+ * NOTES
+ * This file contains code for initial table data synchronization for
+ * logical replication.
+ *
+ * The initial data synchronization is done separately for each table,
+ * in separate apply worker that only fetches the initial snapshot data
+ * from the publisher and then synchronizes the position in stream with
+ * the main apply worker.
+ *
+ * The are several reasons for doing the synchronization this way:
+ * - It allows us to parallelize the initial data synchronization
+ * which lowers the time needed for it to happen.
+ * - The initial synchronization does not have to hold the xid and LSN
+ * for the time it takes to copy data of all tables, causing less
+ * bloat and lower disk consumption compared to doing the
+ * synchronization in single process for whole database.
+ * - It allows us to synchronize the tables added after the initial
+ * synchronization has finished.
+ *
+ * The stream position synchronization works in multiple steps.
+ * - Sync finishes copy and sets table state as SYNCWAIT and waits
+ * for state to change in a loop.
+ * - Apply periodically checks tables that are synchronizing for SYNCWAIT.
+ * When the desired state appears it will compare its position in the
+ * stream with the SYNCWAIT position and based on that changes the
+ * state to based on following rules:
+ * - if the apply is in front of the sync in the wal stream the new
+ * state is set to CATCHUP and apply loops until the sync process
+ * catches up to the same LSN as apply
+ * - if the sync is in front of the apply in the wal stream the new
+ * state is set to SYNCDONE
+ * - if both apply and sync are at the same position in the wal stream
+ * the state of the table is set to READY
+ * - If the state was set to CATCHUP sync will read the stream and
+ * apply changes until it catches up to the specified stream
+ * position and then sets state to READY and signals apply that it
+ * can stop waiting and exits, if the state was set to something
+ * else than CATCHUP the sync process will simply end.
+ * - If the state was set to SYNCDONE by apply, the apply will
+ * continue tracking the table until it reaches the SYNCDONE stream
+ * position at which point it sets state to READY and stops tracking.
+ *
+ * The catalog pg_subscription_rel is used to keep information about
+ * subscribed tables and their state and some transient state during
+ * data synchronization is kept in shared memory.
+ *
+ * Example flows look like this:
+ * - Apply is in front:
+ * sync:8
+ * -> set SYNCWAIT
+ * apply:10
+ * -> set CATCHUP
+ * -> enter wait-loop
+ * sync:10
+ * -> set READY
+ * -> exit
+ * apply:10
+ * -> exit wait-loop
+ * -> continue rep
+ * - Sync in front:
+ * sync:10
+ * -> set SYNCWAIT
+ * apply:8
+ * -> set SYNCDONE
+ * -> continue per-table filtering
+ * sync:10
+ * -> exit
+ * apply:10
+ * -> set READY
+ * -> stop per-table filtering
+ * -> continue rep
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "pgstat.h"
+
+#include "access/xact.h"
+
+#include "catalog/pg_subscription_rel.h"
+#include "catalog/pg_type.h"
+
+#include "commands/copy.h"
+
+#include "replication/logicallauncher.h"
+#include "replication/logicalrelation.h"
+#include "replication/walreceiver.h"
+#include "replication/worker_internal.h"
+
+#include "storage/ipc.h"
+
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+
+static bool table_states_valid = false;
+
+StringInfo copybuf = NULL;
+
+/*
+ * Exit routine for synchronization worker.
+ */
+static void pg_attribute_noreturn()
+finish_sync_worker(void)
+{
+ /* Commit any outstanding transaction. */
+ if (IsTransactionState())
+ CommitTransactionCommand();
+
+ /* And flush all writes. */
+ XLogFlush(GetXLogWriteRecPtr());
+
+ /* Find the main apply worker and signal it. */
+ logicalrep_worker_wakeup(MyLogicalRepWorker->subid, InvalidOid);
+
+ ereport(LOG,
+ (errmsg("logical replication synchronization worker finished processing")));
+
+ /* Stop gracefully */
+ walrcv_disconnect(wrconn);
+ proc_exit(0);
+}
+
+/*
+ * Wait until the table synchronization change.
+ *
+ * Returns false if the relation subscription state disappeared.
+ */
+static bool
+wait_for_sync_status_change(Oid relid, char origstate)
+{
+ int rc;
+ char state = origstate;
+
+ while (!got_SIGTERM)
+ {
+ LogicalRepWorker *worker;
+
+ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+ worker = logicalrep_worker_find(MyLogicalRepWorker->subid,
+ relid, false);
+ if (!worker)
+ {
+ LWLockRelease(LogicalRepWorkerLock);
+ return false;
+ }
+ state = worker->relstate;
+ LWLockRelease(LogicalRepWorkerLock);
+
+ if (state == SUBREL_STATE_UNKNOWN)
+ return false;
+
+ if (state != origstate)
+ return true;
+
+ rc = WaitLatch(&MyProc->procLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ 10000L, WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE);
+
+ /* emergency bailout if postmaster has died */
+ if (rc & WL_POSTMASTER_DEATH)
+ proc_exit(1);
+
+ ResetLatch(&MyProc->procLatch);
+ }
+
+ return false;
+}
+
+/*
+ * Callback from syscache invalidation.
+ */
+void
+invalidate_syncing_table_states(Datum arg, int cacheid, uint32 hashvalue)
+{
+ table_states_valid = false;
+}
+
+/*
+ * Handle table synchronization cooperation from the synchronization
+ * worker.
+ *
+ * If the sync worker is in catch up mode and reached the predetermined
+ * synchronization point in the WAL stream, mark the table as READY and
+ * finish. If it caught up too far, set to SYNCDONE and finish. Things will
+ * then proceed in the "sync in front" scenario.
+ */
+static void
+process_syncing_tables_for_sync(XLogRecPtr current_lsn)
+{
+ Assert(IsTransactionState());
+
+ SpinLockAcquire(&MyLogicalRepWorker->relmutex);
+
+ if (MyLogicalRepWorker->relstate == SUBREL_STATE_CATCHUP &&
+ current_lsn >= MyLogicalRepWorker->relstate_lsn)
+ {
+ TimeLineID tli;
+
+ MyLogicalRepWorker->relstate =
+ (current_lsn == MyLogicalRepWorker->relstate_lsn)
+ ? SUBREL_STATE_READY
+ : SUBREL_STATE_SYNCDONE;
+ MyLogicalRepWorker->relstate_lsn = current_lsn;
+
+ SpinLockRelease(&MyLogicalRepWorker->relmutex);
+
+ SetSubscriptionRelState(MyLogicalRepWorker->subid,
+ MyLogicalRepWorker->relid,
+ MyLogicalRepWorker->relstate,
+ MyLogicalRepWorker->relstate_lsn);
+
+ walrcv_endstreaming(wrconn, &tli);
+ finish_sync_worker();
+ }
+ else
+ SpinLockRelease(&MyLogicalRepWorker->relmutex);
+}
+
+/*
+ * Handle table synchronization cooperation from the apply worker.
+ *
+ * Walk over all subscription tables that are individually tracked by the
+ * apply process (currently, all that have state other than
+ * SUBREL_STATE_READY) and manage synchronization for them.
+ *
+ * If there are tables that need synchronizing and are not being synchronized
+ * yet, start sync workers for them (if there are free slots for sync
+ * workers).
+ *
+ * For tables that are being synchronized already, check if sync workers
+ * either need action from the apply worker or have finished.
+ *
+ * The usual scenario is that the apply got ahead of the sync while the sync
+ * ran, and then the action needed by apply is to mark a table for CATCHUP and
+ * wait for the catchup to happen. In the less common case that sync worker
+ * got in front of the apply worker, the table is marked as SYNCDONE but not
+ * ready yet, as it needs to be tracked until apply reaches the same position
+ * to which it was synced.
+ *
+ * If the synchronization position is reached, then the table can be marked as
+ * READY and is no longer tracked.
+ */
+static void
+process_syncing_tables_for_apply(XLogRecPtr current_lsn)
+{
+ static List *table_states = NIL;
+ ListCell *lc;
+
+ Assert(!IsTransactionState());
+
+ /* We need up to date sync state info for subscription tables here. */
+ if (!table_states_valid)
+ {
+ MemoryContext oldctx;
+ List *rstates;
+ ListCell *lc;
+ SubscriptionRelState *rstate;
+
+ /* Clean the old list. */
+ list_free_deep(table_states);
+ table_states = NIL;
+
+ StartTransactionCommand();
+
+ /* Fetch all non-ready tables. */
+ rstates = GetSubscriptionNotReadyRelations(MySubscription->oid);
+
+ /* Allocate the tracking info in a permanent memory context. */
+ oldctx = MemoryContextSwitchTo(CacheMemoryContext);
+ foreach(lc, rstates)
+ {
+ rstate = palloc(sizeof(SubscriptionRelState));
+ memcpy(rstate, lfirst(lc), sizeof(SubscriptionRelState));
+ table_states = lappend(table_states, rstate);
+ }
+ MemoryContextSwitchTo(oldctx);
+
+ CommitTransactionCommand();
+
+ table_states_valid = true;
+ }
+
+ /* Process all tables that are being synchronized. */
+ foreach(lc, table_states)
+ {
+ SubscriptionRelState *rstate = (SubscriptionRelState *)lfirst(lc);
+
+ if (rstate->state == SUBREL_STATE_SYNCDONE)
+ {
+ /*
+ * Apply has caught up to the position where the table sync
+ * has finished. Time to mark the table as ready so that
+ * apply will just continue to replicate it normally.
+ */
+ if (current_lsn >= rstate->lsn)
+ {
+ rstate->state = SUBREL_STATE_READY;
+ rstate->lsn = current_lsn;
+ StartTransactionCommand();
+ SetSubscriptionRelState(MyLogicalRepWorker->subid,
+ rstate->relid, rstate->state,
+ rstate->lsn);
+ CommitTransactionCommand();
+ }
+ }
+ else
+ {
+ LogicalRepWorker *syncworker;
+ int nsyncworkers = 0;
+
+ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+ syncworker = logicalrep_worker_find(MyLogicalRepWorker->subid,
+ rstate->relid, false);
+ if (syncworker)
+ {
+ SpinLockAcquire(&syncworker->relmutex);
+ rstate->state = syncworker->relstate;
+ rstate->lsn = syncworker->relstate_lsn;
+ SpinLockRelease(&syncworker->relmutex);
+ }
+ else
+ /*
+ * If no sync worker for this table yet, could running sync
+ * workers for this subscription, while we have the lock, for
+ * later.
+ */
+ nsyncworkers = logicalrep_sync_worker_count(MyLogicalRepWorker->subid);
+ LWLockRelease(LogicalRepWorkerLock);
+
+ /*
+ * There is a worker synchronizing the relation and waiting for
+ * apply to do something.
+ */
+ if (syncworker && rstate->state == SUBREL_STATE_SYNCWAIT)
+ {
+ /*
+ * There are three possible synchronization situations here.
+ *
+ * a) Apply is in front of the table sync: We tell the table
+ * sync to CATCHUP.
+ *
+ * b) Apply is behind the table sync: We tell the table sync
+ * to mark the table as SYNCDONE and finish.
+
+ * c) Apply and table sync are at the same position: We tell
+ * table sync to mark the table as READY and finish.
+ *
+ * In any case we'll need to wait for table sync to change
+ * the state in catalog and only then continue ourselves.
+ */
+ if (current_lsn > rstate->lsn)
+ {
+ rstate->state = SUBREL_STATE_CATCHUP;
+ rstate->lsn = current_lsn;
+ }
+ else if (current_lsn == rstate->lsn)
+ {
+ rstate->state = SUBREL_STATE_READY;
+ rstate->lsn = current_lsn;
+ }
+ else
+ rstate->state = SUBREL_STATE_SYNCDONE;
+
+ SpinLockAcquire(&syncworker->relmutex);
+ syncworker->relstate = rstate->state;
+ syncworker->relstate_lsn = rstate->lsn;
+ SpinLockRelease(&syncworker->relmutex);
+
+ /* Signal the sync worker, as it may be waiting for us. */
+ logicalrep_worker_wakeup_ptr(syncworker);
+
+ /*
+ * Enter busy loop and wait for synchronization status
+ * change.
+ */
+ wait_for_sync_status_change(rstate->relid, rstate->state);
+ }
+
+ /*
+ * If there is no sync worker registered for the table and
+ * there is some free sync worker slot, start new sync worker
+ * for the table.
+ */
+ else if (!syncworker && nsyncworkers < max_sync_workers_per_subscription)
+ {
+ logicalrep_worker_launch(MyLogicalRepWorker->dbid,
+ MySubscription->oid,
+ MySubscription->name,
+ MyLogicalRepWorker->userid,
+ rstate->relid);
+ }
+ }
+ }
+}
+
+/*
+ * Process state possible change(s) of tables that are being synchronized.
+ */
+void
+process_syncing_tables(XLogRecPtr current_lsn)
+{
+ if (am_tablesync_worker())
+ process_syncing_tables_for_sync(current_lsn);
+ else
+ process_syncing_tables_for_apply(current_lsn);
+}
+
+/*
+ * Create list of columns for COPY based on logical relation mapping.
+ */
+static List *
+make_copy_attnamelist(LogicalRepRelMapEntry *rel)
+{
+ List *attnamelist = NIL;
+ TupleDesc desc = RelationGetDescr(rel->localrel);
+ int i;
+
+ for (i = 0; i < desc->natts; i++)
+ {
+ int remoteattnum = rel->attrmap[i];
+
+ /* Skip dropped attributes. */
+ if (desc->attrs[i]->attisdropped)
+ continue;
+
+ /* Skip attributes that are missing on remote side. */
+ if (remoteattnum < 0)
+ continue;
+
+ attnamelist = lappend(attnamelist,
+ makeString(rel->remoterel.attnames[remoteattnum]));
+ }
+
+ return attnamelist;
+}
+
+/*
+ * Data source callback for the COPY FROM, which reads from the remote
+ * connection and passes the data back to our local COPY.
+ */
+static int
+copy_read_data(void *outbuf, int minread, int maxread)
+{
+ int bytesread = 0;
+ int avail;
+
+ /* If there are some leftover data from previous read, use them. */
+ avail = copybuf->len - copybuf->cursor;
+ if (avail)
+ {
+ if (avail > maxread)
+ avail = maxread;
+ memcpy(outbuf, ©buf->data[copybuf->cursor], avail);
+ copybuf->cursor += avail;
+ maxread -= avail;
+ bytesread += avail;
+ }
+
+ while (!got_SIGTERM && maxread > 0 && bytesread < minread)
+ {
+ pgsocket fd = PGINVALID_SOCKET;
+ int rc;
+ int len;
+ char *buf = NULL;
+
+ for (;;)
+ {
+ /* Try read the data. */
+ len = walrcv_receive(wrconn, &buf, &fd);
+
+ CHECK_FOR_INTERRUPTS();
+
+ if (len == 0)
+ break;
+ else if (len < 0)
+ return bytesread;
+ else
+ {
+ /* Process the data */
+ copybuf->data = buf;
+ copybuf->len = len;
+ copybuf->cursor = 0;
+
+ avail = copybuf->len - copybuf->cursor;
+ if (avail > maxread)
+ avail = maxread;
+ memcpy(outbuf, ©buf->data[copybuf->cursor], avail);
+ outbuf = (void *) ((char *) outbuf + avail);
+ copybuf->cursor += avail;
+ maxread -= avail;
+ bytesread += avail;
+ }
+
+ if (maxread <= 0 || bytesread >= minread)
+ return bytesread;
+ }
+
+ /*
+ * Wait for more data or latch.
+ */
+ rc = WaitLatchOrSocket(&MyProc->procLatch,
+ WL_SOCKET_READABLE | WL_LATCH_SET |
+ WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ fd, 1000L, WAIT_EVENT_LOGICAL_SYNC_DATA);
+
+ /* Emergency bailout if postmaster has died */
+ if (rc & WL_POSTMASTER_DEATH)
+ proc_exit(1);
+
+ ResetLatch(&MyProc->procLatch);
+ }
+
+ /* Check for exit condition. */
+ if (got_SIGTERM)
+ proc_exit(0);
+
+ return bytesread;
+}
+
+
+/*
+ * Get information about remote relation in similar fashion the RELATION
+ * message provides during replication.
+ */
+static void
+fetch_remote_table_info(char *nspname, char *relname,
+ LogicalRepRelation *lrel)
+{
+ WalRcvExecResult *res;
+ StringInfoData cmd;
+ TupleTableSlot *slot;
+ Oid tableRow[2] = {OIDOID, CHAROID};
+ Oid attrRow[4] = {TEXTOID, OIDOID, INT4OID, BOOLOID};
+ bool isnull;
+ int natt;
+
+ lrel->nspname = nspname;
+ lrel->relname = relname;
+
+ /* First fetch Oid and replica identity. */
+ initStringInfo(&cmd);
+ appendStringInfo(&cmd, "SELECT c.oid, c.relreplident"
+ " FROM pg_catalog.pg_class c,"
+ " pg_catalog.pg_namespace n"
+ " WHERE n.nspname = %s"
+ " AND c.relname = %s"
+ " AND c.relkind = 'r'",
+ quote_literal_cstr(nspname),
+ quote_literal_cstr(relname));
+ res = walrcv_exec(wrconn, cmd.data, 2, tableRow);
+
+ if (res->status != WALRCV_OK_TUPLES)
+ ereport(ERROR,
+ (errmsg("could not fetch table info for table \"%s.%s\" from publisher: %s",
+ nspname, relname, res->err)));
+
+ slot = MakeSingleTupleTableSlot(res->tupledesc);
+ if (!tuplestore_gettupleslot(res->tuplestore, true, false, slot))
+ ereport(ERROR,
+ (errmsg("table \"%s.%s\" not found on publisher",
+ nspname, relname)));
+
+ lrel->remoteid = DatumGetObjectId(slot_getattr(slot, 1, &isnull));
+ Assert(!isnull);
+ lrel->replident = DatumGetChar(slot_getattr(slot, 2, &isnull));
+ Assert(!isnull);
+
+ ExecDropSingleTupleTableSlot(slot);
+ walrcv_clear_result(res);
+
+ /* Now fetch columns. */
+ resetStringInfo(&cmd);
+ appendStringInfo(&cmd,
+ "SELECT a.attname,"
+ " a.atttypid,"
+ " a.atttypmod,"
+ " a.attnum = ANY(i.indkey)"
+ " FROM pg_catalog.pg_attribute a"
+ " LEFT JOIN pg_catalog.pg_index i"
+ " ON (i.indexrelid = pg_get_replica_identity_index(%u))"
+ " WHERE a.attnum > 0::pg_catalog.int2"
+ " AND NOT a.attisdropped"
+ " AND a.attrelid = %u"
+ " ORDER BY a.attnum",
+ lrel->remoteid, lrel->remoteid);
+ res = walrcv_exec(wrconn, cmd.data, 4, attrRow);
+
+ if (res->status != WALRCV_OK_TUPLES)
+ ereport(ERROR,
+ (errmsg("could not fetch table info for table \"%s.%s\": %s",
+ nspname, relname, res->err)));
+
+ /* We don't know number of rows coming, so allocate enough space. */
+ lrel->attnames = palloc0(MaxTupleAttributeNumber * sizeof(char *));
+ lrel->atttyps = palloc0(MaxTupleAttributeNumber * sizeof(Oid));
+ lrel->attkeys = NULL;
+
+ natt = 0;
+ slot = MakeSingleTupleTableSlot(res->tupledesc);
+ while (tuplestore_gettupleslot(res->tuplestore, true, false, slot))
+ {
+ lrel->attnames[natt] =
+ pstrdup(TextDatumGetCString(slot_getattr(slot, 1, &isnull)));
+ Assert(!isnull);
+ lrel->atttyps[natt] = DatumGetObjectId(slot_getattr(slot, 2, &isnull));
+ Assert(!isnull);
+ if (DatumGetBool(slot_getattr(slot, 4, &isnull)))
+ lrel->attkeys = bms_add_member(lrel->attkeys, natt);
+
+ /* Should never happen. */
+ if (++natt >= MaxTupleAttributeNumber)
+ elog(ERROR, "too many columns in remote table \"%s.%s\"",
+ nspname, relname);
+
+ ExecClearTuple(slot);
+ }
+ ExecDropSingleTupleTableSlot(slot);
+
+ lrel->natts = natt;
+
+ walrcv_clear_result(res);
+ pfree(cmd.data);
+}
+
+/*
+ * Copy existing data of a table from publisher.
+ *
+ * Caller is responsible for locking the local relation.
+ */
+static void
+copy_table(Relation rel)
+{
+ LogicalRepRelMapEntry *relmapentry;
+ LogicalRepRelation lrel;
+ WalRcvExecResult *res;
+ StringInfoData cmd;
+ CopyState cstate;
+ List *attnamelist;
+
+ /* Get the publisher relation info. */
+ fetch_remote_table_info(get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel), &lrel);
+
+ /* Put the relation into relmap. */
+ logicalrep_relmap_update(&lrel);
+
+ /* Map the publisher relation to local one. */
+ relmapentry = logicalrep_rel_open(lrel.remoteid, NoLock);
+ Assert(rel == relmapentry->localrel);
+
+ /* Start copy on the publisher. */
+ initStringInfo(&cmd);
+ appendStringInfo(&cmd, "COPY %s TO STDOUT",
+ quote_qualified_identifier(lrel.nspname, lrel.relname));
+ res = walrcv_exec(wrconn, cmd.data, 0, NULL);
+ pfree(cmd.data);
+ if (res->status != WALRCV_OK_COPY_OUT)
+ ereport(ERROR,
+ (errmsg("could not start initial contents copy for table \"%s.%s\": %s",
+ lrel.nspname, lrel.relname, res->err)));
+ walrcv_clear_result(res);
+
+ copybuf = makeStringInfo();
+
+ /* Create CopyState for ingestion of the data from publisher. */
+ attnamelist = make_copy_attnamelist(relmapentry);
+ cstate = BeginCopyFrom(NULL, rel, NULL, false, copy_read_data, attnamelist, NIL);
+
+ /* Do the copy */
+ (void) CopyFrom(cstate);
+
+ logicalrep_rel_close(relmapentry, NoLock);
+}
+
+/*
+ * Start syncing the table in the sync worker.
+ *
+ * The returned slot name is palloced in current memory context.
+ */
+char *
+LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
+{
+ char *slotname;
+ char *err;
+
+ /* Check the state of the table synchronization. */
+ StartTransactionCommand();
+ SpinLockAcquire(&MyLogicalRepWorker->relmutex);
+ MyLogicalRepWorker->relstate =
+ GetSubscriptionRelState(MyLogicalRepWorker->subid,
+ MyLogicalRepWorker->relid,
+ &MyLogicalRepWorker->relstate_lsn,
+ false);
+ SpinLockRelease(&MyLogicalRepWorker->relmutex);
+ CommitTransactionCommand();
+
+ /*
+ * To build a slot name for the sync work, we are limited to NAMEDATALEN -
+ * 1 characters. We cut the original slot name to NAMEDATALEN - 28 chars
+ * and append _%u_sync_%u (1 + 10 + 6 + 10 + '\0'). (It's actually the
+ * NAMEDATALEN on the remote that matters, but this scheme will also work
+ * reasonably if that is different.)
+ */
+ StaticAssertStmt(NAMEDATALEN >= 32, "NAMEDATALEN too small"); /* for sanity */
+ slotname = psprintf("%.*s_%u_sync_%u",
+ NAMEDATALEN - 28,
+ MySubscription->slotname,
+ MySubscription->oid,
+ MyLogicalRepWorker->relid);
+
+ wrconn = walrcv_connect(MySubscription->conninfo, true, slotname, &err);
+ if (wrconn == NULL)
+ ereport(ERROR,
+ (errmsg("could not connect to the publisher: %s", err)));
+
+ switch (MyLogicalRepWorker->relstate)
+ {
+ case SUBREL_STATE_INIT:
+ case SUBREL_STATE_DATASYNC:
+ {
+ Relation rel;
+ WalRcvExecResult *res;
+
+ SpinLockAcquire(&MyLogicalRepWorker->relmutex);
+ MyLogicalRepWorker->relstate = SUBREL_STATE_DATASYNC;
+ MyLogicalRepWorker->relstate_lsn = InvalidXLogRecPtr;
+ SpinLockRelease(&MyLogicalRepWorker->relmutex);
+
+ /* Update the state and make it visible to others. */
+ StartTransactionCommand();
+ SetSubscriptionRelState(MyLogicalRepWorker->subid,
+ MyLogicalRepWorker->relid,
+ MyLogicalRepWorker->relstate,
+ MyLogicalRepWorker->relstate_lsn);
+ CommitTransactionCommand();
+
+ /*
+ * We want to do the table data sync in single
+ * transaction.
+ */
+ StartTransactionCommand();
+
+ /*
+ * Use standard write lock here. It might be better to
+ * disallow access to table while it's being synchronized.
+ * But we don't want to block the main apply process from
+ * working and it has to open relation in RowExclusiveLock
+ * when remapping remote relation id to local one.
+ */
+ rel = heap_open(MyLogicalRepWorker->relid, RowExclusiveLock);
+
+ /*
+ * Create temporary slot for the sync process.
+ * We do this inside transaction so that we can use the
+ * snapshot made by the slot to get existing data.
+ */
+ res = walrcv_exec(wrconn,
+ "BEGIN READ ONLY ISOLATION LEVEL "
+ "REPEATABLE READ", 0, NULL);
+ if (res->status != WALRCV_OK_COMMAND)
+ ereport(ERROR,
+ (errmsg("table copy could not start transaction on publisher"),
+ errdetail("The error was: %s", res->err)));
+ walrcv_clear_result(res);
+
+ /*
+ * Create new temporary logical decoding slot.
+ *
+ * We'll use slot for data copy so make sure the snapshot
+ * is used for the transaction, that way the COPY will get
+ * data that is consistent with the lsn used by the slot
+ * to start decoding.
+ */
+ walrcv_create_slot(wrconn, slotname, true,
+ CRS_USE_SNAPSHOT, origin_startpos);
+
+ copy_table(rel);
+
+ res = walrcv_exec(wrconn, "COMMIT", 0, NULL);
+ if (res->status != WALRCV_OK_COMMAND)
+ ereport(ERROR,
+ (errmsg("table copy could not finish transaction on publisher"),
+ errdetail("The error was: %s", res->err)));
+ walrcv_clear_result(res);
+
+ heap_close(rel, NoLock);
+
+ /* Make the copy visible. */
+ CommandCounterIncrement();
+
+ /*
+ * We are done with the initial data synchronization,
+ * update the state.
+ */
+ SpinLockAcquire(&MyLogicalRepWorker->relmutex);
+ MyLogicalRepWorker->relstate = SUBREL_STATE_SYNCWAIT;
+ MyLogicalRepWorker->relstate_lsn = *origin_startpos;
+ SpinLockRelease(&MyLogicalRepWorker->relmutex);
+
+ /*
+ * Wait for main apply worker to either tell us to
+ * catchup or that we are done.
+ */
+ wait_for_sync_status_change(MyLogicalRepWorker->relid,
+ MyLogicalRepWorker->relstate);
+ if (MyLogicalRepWorker->relstate != SUBREL_STATE_CATCHUP)
+ {
+ /* Update the new state. */
+ SetSubscriptionRelState(MyLogicalRepWorker->subid,
+ MyLogicalRepWorker->relid,
+ MyLogicalRepWorker->relstate,
+ MyLogicalRepWorker->relstate_lsn);
+ finish_sync_worker();
+ }
+ break;
+ }
+ case SUBREL_STATE_SYNCDONE:
+ case SUBREL_STATE_READY:
+ /* Nothing to do here but finish. */
+ finish_sync_worker();
+ break;
+ default:
+ elog(ERROR, "unknown relation state \"%c\"",
+ MyLogicalRepWorker->relstate);
+ }
+
+ return slotname;
+}
#include "catalog/namespace.h"
#include "catalog/pg_subscription.h"
+#include "catalog/pg_subscription_rel.h"
#include "commands/trigger.h"
} SlotErrCallbackArg;
static MemoryContext ApplyContext = NULL;
-static MemoryContext ApplyCacheContext = NULL;
+MemoryContext ApplyCacheContext = NULL;
WalReceiverConn *wrconn = NULL;
bool MySubscriptionValid = false;
bool in_remote_transaction = false;
+static XLogRecPtr remote_final_lsn = InvalidXLogRecPtr;
static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply);
static void reread_subscription(void);
+/*
+ * Should this worker apply changes for given relation.
+ *
+ * This is mainly needed for initial relation data sync as that runs in
+ * separate worker process running in parallel and we need some way to skip
+ * changes coming to the main apply worker during the sync of a table.
+ *
+ * Note we need to do smaller or equals comparison for SYNCDONE state because
+ * it might hold position of end of intitial slot consistent point WAL
+ * record + 1 (ie start of next record) and next record can be COMMIT of
+ * transaction we are now processing (which is what we set remote_final_lsn
+ * to in apply_handle_begin).
+ */
+static bool
+should_apply_changes_for_rel(LogicalRepRelMapEntry *rel)
+{
+ if (am_tablesync_worker())
+ return MyLogicalRepWorker->relid == rel->localreloid;
+ else
+ return (rel->state == SUBREL_STATE_READY ||
+ (rel->state == SUBREL_STATE_SYNCDONE &&
+ rel->statelsn <= remote_final_lsn));
+}
+
/*
* Make sure that we started local transaction.
*
replorigin_session_origin_timestamp = begin_data.committime;
replorigin_session_origin_lsn = begin_data.final_lsn;
+ remote_final_lsn = begin_data.final_lsn;
+
in_remote_transaction = true;
pgstat_report_activity(STATE_RUNNING, NULL);
Assert(commit_data.commit_lsn == replorigin_session_origin_lsn);
Assert(commit_data.committime == replorigin_session_origin_timestamp);
- if (IsTransactionState())
+ Assert(commit_data.commit_lsn == remote_final_lsn);
+
+ /* The synchronization worker runs in single transaction. */
+ if (IsTransactionState() && !am_tablesync_worker())
{
CommitTransactionCommand();
in_remote_transaction = false;
+ /* Process any tables that are being synchronized in parallel. */
+ process_syncing_tables(commit_data.end_lsn);
+
pgstat_report_activity(STATE_IDLE, NULL);
}
* ORIGIN message can only come inside remote transaction and before
* any actual writes.
*/
- if (!in_remote_transaction || IsTransactionState())
+ if (!in_remote_transaction ||
+ (IsTransactionState() && !am_tablesync_worker()))
ereport(ERROR,
(errcode(ERRCODE_PROTOCOL_VIOLATION),
errmsg("ORIGIN message sent out of order")));
relid = logicalrep_read_insert(s, &newtup);
rel = logicalrep_rel_open(relid, RowExclusiveLock);
+ if (!should_apply_changes_for_rel(rel))
+ {
+ /*
+ * The relation can't become interesting in the middle of the
+ * transaction so it's safe to unlock it.
+ */
+ logicalrep_rel_close(rel, RowExclusiveLock);
+ return;
+ }
/* Initialize the executor state. */
estate = create_estate_for_relation(rel);
relid = logicalrep_read_update(s, &has_oldtup, &oldtup,
&newtup);
rel = logicalrep_rel_open(relid, RowExclusiveLock);
+ if (!should_apply_changes_for_rel(rel))
+ {
+ /*
+ * The relation can't become interesting in the middle of the
+ * transaction so it's safe to unlock it.
+ */
+ logicalrep_rel_close(rel, RowExclusiveLock);
+ return;
+ }
/* Check if we can do the update. */
check_relation_updatable(rel);
relid = logicalrep_read_delete(s, &oldtup);
rel = logicalrep_rel_open(relid, RowExclusiveLock);
+ if (!should_apply_changes_for_rel(rel))
+ {
+ /*
+ * The relation can't become interesting in the middle of the
+ * transaction so it's safe to unlock it.
+ */
+ logicalrep_rel_close(rel, RowExclusiveLock);
+ return;
+ }
/* Check if we can do the delete. */
check_relation_updatable(rel);
* Apply main loop.
*/
static void
-ApplyLoop(void)
+LogicalRepApplyLoop(XLogRecPtr last_received)
{
- XLogRecPtr last_received = InvalidXLogRecPtr;
-
/* Init the ApplyContext which we use for easier cleanup. */
ApplyContext = AllocSetContextCreate(TopMemoryContext,
"ApplyContext",
}
else if (c == 'k')
{
- XLogRecPtr endpos;
+ XLogRecPtr end_lsn;
TimestampTz timestamp;
bool reply_requested;
- endpos = pq_getmsgint64(&s);
+ end_lsn = pq_getmsgint64(&s);
timestamp = pq_getmsgint64(&s);
reply_requested = pq_getmsgbyte(&s);
- send_feedback(endpos, reply_requested, false);
+ if (last_received < end_lsn)
+ last_received = end_lsn;
+
+ send_feedback(last_received, reply_requested, false);
UpdateWorkerStats(last_received, timestamp, true);
}
/* other message types are purposefully ignored */
len = walrcv_receive(wrconn, &buf, &fd);
}
+
+ /* confirm all writes at once */
+ send_feedback(last_received, false, false);
}
if (!in_remote_transaction)
* If we didn't get any transactions for a while there might be
* unconsumed invalidation messages in the queue, consume them now.
*/
- StartTransactionCommand();
- /* Check for subscription change */
+ AcceptInvalidationMessages();
if (!MySubscriptionValid)
reread_subscription();
- CommitTransactionCommand();
- }
- /* confirm all writes at once */
- send_feedback(last_received, false, false);
+ /* Process any table synchronization changes. */
+ process_syncing_tables(last_received);
+ }
/* Cleanup the memory. */
MemoryContextResetAndDeleteChildren(ApplyContext);
/* Check if we need to exit the streaming loop. */
if (endofstream)
+ {
+ TimeLineID tli;
+ walrcv_endstreaming(wrconn, &tli);
break;
+ }
/*
* Wait for more data or latch.
{
MemoryContext oldctx;
Subscription *newsub;
+ bool started_tx = false;
+
+ /* This function might be called inside or outside of transaction. */
+ if (!IsTransactionState())
+ {
+ StartTransactionCommand();
+ started_tx = true;
+ }
/* Ensure allocations in permanent context. */
oldctx = MemoryContextSwitchTo(ApplyCacheContext);
MemoryContextSwitchTo(oldctx);
+ if (started_tx)
+ CommitTransactionCommand();
+
MySubscriptionValid = true;
}
int worker_slot = DatumGetObjectId(main_arg);
MemoryContext oldctx;
char originname[NAMEDATALEN];
- RepOriginId originid;
XLogRecPtr origin_startpos;
- char *err;
- int server_version;
- TimeLineID startpointTLI;
+ char *myslotname;
WalRcvStreamOptions options;
/* Attach to slot */
subscription_change_cb,
(Datum) 0);
- ereport(LOG,
- (errmsg("logical replication apply for subscription \"%s\" has started",
- MySubscription->name)));
-
- /* Setup replication origin tracking. */
- snprintf(originname, sizeof(originname), "pg_%u", MySubscription->oid);
- originid = replorigin_by_name(originname, true);
- if (!OidIsValid(originid))
- originid = replorigin_create(originname);
- replorigin_session_setup(originid);
- replorigin_session_origin = originid;
- origin_startpos = replorigin_session_get_progress(false);
+ if (am_tablesync_worker())
+ elog(LOG, "logical replication sync for subscription %s, table %s started",
+ MySubscription->name, get_rel_name(MyLogicalRepWorker->relid));
+ else
+ elog(LOG, "logical replication apply for subscription %s started",
+ MySubscription->name);
CommitTransactionCommand();
/* Connect to the origin and start the replication. */
elog(DEBUG1, "connecting to publisher using connection string \"%s\"",
MySubscription->conninfo);
- wrconn = walrcv_connect(MySubscription->conninfo, true,
- MySubscription->name, &err);
- if (wrconn == NULL)
- ereport(ERROR,
- (errmsg("could not connect to the publisher: %s", err)));
+
+ if (am_tablesync_worker())
+ {
+ char *syncslotname;
+
+ /* This is table synchroniation worker, call initial sync. */
+ syncslotname = LogicalRepSyncTableStart(&origin_startpos);
+
+ /* The slot name needs to be allocated in permanent memory context. */
+ oldctx = MemoryContextSwitchTo(ApplyCacheContext);
+ myslotname = pstrdup(syncslotname);
+ MemoryContextSwitchTo(oldctx);
+
+ pfree(syncslotname);
+ }
+ else
+ {
+ /* This is main apply worker */
+ RepOriginId originid;
+ TimeLineID startpointTLI;
+ char *err;
+ int server_version;
+
+ myslotname = MySubscription->slotname;
+
+ /* Setup replication origin tracking. */
+ StartTransactionCommand();
+ snprintf(originname, sizeof(originname), "pg_%u", MySubscription->oid);
+ originid = replorigin_by_name(originname, true);
+ if (!OidIsValid(originid))
+ originid = replorigin_create(originname);
+ replorigin_session_setup(originid);
+ replorigin_session_origin = originid;
+ origin_startpos = replorigin_session_get_progress(false);
+ CommitTransactionCommand();
+
+ wrconn = walrcv_connect(MySubscription->conninfo, true, myslotname,
+ &err);
+ if (wrconn == NULL)
+ ereport(ERROR,
+ (errmsg("could not connect to the publisher: %s", err)));
+
+ /*
+ * We don't really use the output identify_system for anything
+ * but it does some initializations on the upstream so let's still
+ * call it.
+ */
+ (void) walrcv_identify_system(wrconn, &startpointTLI,
+ &server_version);
+
+ }
/*
- * We don't really use the output identify_system for anything
- * but it does some initializations on the upstream so let's still
- * call it.
+ * Setup callback for syscache so that we know when something
+ * changes in the subscription relation state.
*/
- (void) walrcv_identify_system(wrconn, &startpointTLI, &server_version);
+ CacheRegisterSyscacheCallback(SUBSCRIPTIONRELMAP,
+ invalidate_syncing_table_states,
+ (Datum) 0);
/* Build logical replication streaming options. */
options.logical = true;
options.startpoint = origin_startpos;
- options.slotname = MySubscription->slotname;
+ options.slotname = myslotname;
options.proto.logical.proto_version = LOGICALREP_PROTO_VERSION_NUM;
options.proto.logical.publication_names = MySubscription->publications;
- /* Start streaming from the slot. */
+ /* Start normal logical streaming replication. */
walrcv_startstreaming(wrconn, &options);
/* Run the main loop. */
- ApplyLoop();
+ LogicalRepApplyLoop(origin_startpos);
walrcv_disconnect(wrconn);
/* Result of the parsing is returned here */
Node *replication_parse_result;
+static SQLCmd *make_sqlcmd(void);
+
/*
* Bison doesn't allocate anything that needs to live across parser calls,
%token <str> SCONST IDENT
%token <uintval> UCONST
%token <recptr> RECPTR
+%token T_WORD
/* Keyword tokens. */
%token K_BASE_BACKUP
%token K_TEMPORARY
%token K_EXPORT_SNAPSHOT
%token K_NOEXPORT_SNAPSHOT
+%token K_USE_SNAPSHOT
%type <node> command
%type <node> base_backup start_replication start_logical_replication
create_replication_slot drop_replication_slot identify_system
- timeline_history show
+ timeline_history show sql_cmd
%type <list> base_backup_opt_list
%type <defelt> base_backup_opt
%type <uintval> opt_timeline
| drop_replication_slot
| timeline_history
| show
+ | sql_cmd
;
/*
$$ = makeDefElem("export_snapshot",
(Node *)makeInteger(FALSE), -1);
}
+ | K_USE_SNAPSHOT
+ {
+ $$ = makeDefElem("use_snapshot",
+ (Node *)makeInteger(TRUE), -1);
+ }
| K_RESERVE_WAL
{
$$ = makeDefElem("reserve_wal",
SCONST { $$ = (Node *) makeString($1); }
| /* EMPTY */ { $$ = NULL; }
;
+
+sql_cmd:
+ IDENT { $$ = (Node *) make_sqlcmd(); }
+ ;
%%
+static SQLCmd *
+make_sqlcmd(void)
+{
+ SQLCmd *cmd = makeNode(SQLCmd);
+ int tok;
+
+ /* Just move lexer to the end of command. */
+ for (;;)
+ {
+ tok = yylex();
+ if (tok == ';' || tok == 0)
+ break;
+ }
+ return cmd;
+}
+
#include "repl_scanner.c"
TEMPORARY { return K_TEMPORARY; }
EXPORT_SNAPSHOT { return K_EXPORT_SNAPSHOT; }
NOEXPORT_SNAPSHOT { return K_NOEXPORT_SNAPSHOT; }
+USE_SNAPSHOT { return K_USE_SNAPSHOT; }
"," { return ','; }
";" { return ';'; }
}
. {
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("syntax error: unexpected character \"%s\"", yytext)));
+ return T_WORD;
}
%%
static void
parseCreateReplSlotOptions(CreateReplicationSlotCmd *cmd,
bool *reserve_wal,
- bool *export_snapshot)
+ CRSSnapshotAction *snapshot_action)
{
ListCell *lc;
bool snapshot_action_given = false;
errmsg("conflicting or redundant options")));
snapshot_action_given = true;
- *export_snapshot = defGetBoolean(defel);
+ *snapshot_action = defGetBoolean(defel) ? CRS_EXPORT_SNAPSHOT :
+ CRS_NOEXPORT_SNAPSHOT;
+ }
+ else if (strcmp(defel->defname, "use_snapshot") == 0)
+ {
+ if (snapshot_action_given || cmd->kind != REPLICATION_KIND_LOGICAL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ snapshot_action_given = true;
+ *snapshot_action = CRS_USE_SNAPSHOT;
}
else if (strcmp(defel->defname, "reserve_wal") == 0)
{
char xpos[MAXFNAMELEN];
char *slot_name;
bool reserve_wal = false;
- bool export_snapshot = true;
+ CRSSnapshotAction snapshot_action = CRS_EXPORT_SNAPSHOT;
DestReceiver *dest;
TupOutputState *tstate;
TupleDesc tupdesc;
Assert(!MyReplicationSlot);
- parseCreateReplSlotOptions(cmd, &reserve_wal, &export_snapshot);
+ parseCreateReplSlotOptions(cmd, &reserve_wal, &snapshot_action);
/* setup state for XLogReadPage */
sendTimeLineIsHistoric = false;
{
LogicalDecodingContext *ctx;
+ /*
+ * Do options check early so that we can bail before calling the
+ * DecodingContextFindStartpoint which can take long time.
+ */
+ if (snapshot_action == CRS_EXPORT_SNAPSHOT)
+ {
+ if (IsTransactionBlock())
+ ereport(ERROR,
+ (errmsg("CREATE_REPLICATION_SLOT ... EXPORT_SNAPSHOT "
+ "must not be called inside a transaction")));
+ }
+ else if (snapshot_action == CRS_USE_SNAPSHOT)
+ {
+ if (!IsTransactionBlock())
+ ereport(ERROR,
+ (errmsg("CREATE_REPLICATION_SLOT ... USE_SNAPSHOT "
+ "must be called inside a transaction")));
+
+ if (XactIsoLevel != XACT_REPEATABLE_READ)
+ ereport(ERROR,
+ (errmsg("CREATE_REPLICATION_SLOT ... USE_SNAPSHOT "
+ "must be called in REPEATABLE READ isolation mode transaction")));
+
+ if (FirstSnapshotSet)
+ ereport(ERROR,
+ (errmsg("CREATE_REPLICATION_SLOT ... USE_SNAPSHOT "
+ "must be called before any query")));
+
+ if (IsSubTransaction())
+ ereport(ERROR,
+ (errmsg("CREATE_REPLICATION_SLOT ... USE_SNAPSHOT "
+ "must not be called in a subtransaction")));
+ }
+
ctx = CreateInitDecodingContext(cmd->plugin, NIL,
logical_read_xlog_page,
WalSndPrepareWrite, WalSndWriteData);
DecodingContextFindStartpoint(ctx);
/*
- * Export the snapshot if we've been asked to do so.
+ * Export or use the snapshot if we've been asked to do so.
*
* NB. We will convert the snapbuild.c kind of snapshot to normal
* snapshot when doing this.
*/
- if (export_snapshot)
+ if (snapshot_action == CRS_EXPORT_SNAPSHOT)
+ {
snapshot_name = SnapBuildExportSnapshot(ctx->snapshot_builder);
+ }
+ else if (snapshot_action == CRS_USE_SNAPSHOT)
+ {
+ Snapshot snap;
+
+ snap = SnapBuildInitalSnapshot(ctx->snapshot_builder);
+ RestoreTransactionSnapshot(snap, MyProc);
+ }
/* don't need the decoding context anymore */
FreeDecodingContext(ctx);
/*
* Execute an incoming replication command.
+ *
+ * Returns true if the cmd_string was recognized as WalSender command, false
+ * if not.
*/
-void
+bool
exec_replication_command(const char *cmd_string)
{
int parse_rc;
cmd_node = replication_parse_result;
+ /*
+ * CREATE_REPLICATION_SLOT ... LOGICAL exports a snapshot. If it was
+ * called outside of transaction the snapshot should be cleared here.
+ */
+ if (!IsTransactionBlock())
+ SnapBuildClearExportedSnapshot();
+
+ /*
+ * For aborted transactions, don't allow anything except pure SQL,
+ * the exec_simple_query() will handle it correctly.
+ */
+ if (IsAbortedTransactionBlockState() && !IsA(cmd_node, SQLCmd))
+ ereport(ERROR,
+ (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
+ errmsg("current transaction is aborted, "
+ "commands ignored until end of transaction block")));
+
+ CHECK_FOR_INTERRUPTS();
+
/*
* Allocate buffers that will be used for each outgoing and incoming
* message. We do this just once per command to reduce palloc overhead.
break;
case T_BaseBackupCmd:
+ PreventTransactionChain(true, "BASE_BACKUP");
SendBaseBackup((BaseBackupCmd *) cmd_node);
break;
{
StartReplicationCmd *cmd = (StartReplicationCmd *) cmd_node;
+ PreventTransactionChain(true, "START_REPLICATION");
+
if (cmd->kind == REPLICATION_KIND_PHYSICAL)
StartReplication(cmd);
else
}
case T_TimeLineHistoryCmd:
+ PreventTransactionChain(true, "TIMELINE_HISTORY");
SendTimeLineHistory((TimeLineHistoryCmd *) cmd_node);
break;
}
break;
+ case T_SQLCmd:
+ if (MyDatabaseId == InvalidOid)
+ ereport(ERROR,
+ (errmsg("not connected to database")));
+
+ /* Tell the caller that this wasn't a WalSender command. */
+ return false;
+
default:
elog(ERROR, "unrecognized replication command node tag: %u",
cmd_node->type);
/* Send CommandComplete message */
EndCommand("SELECT", DestRemote);
+
+ return true;
}
/*
pq_getmsgend(&input_message);
if (am_walsender)
- exec_replication_command(query_string);
+ {
+ if (!exec_replication_command(query_string))
+ exec_simple_query(query_string);
+ }
else
exec_simple_query(query_string);
{
return pg_current_logfile(fcinfo);
}
+
+/*
+ * SQL wrapper around RelationGetReplicaIndex().
+ */
+Datum
+pg_get_replica_identity_index(PG_FUNCTION_ARGS)
+{
+ Oid reloid = PG_GETARG_OID(0);
+ Oid idxoid;
+ Relation rel;
+
+ rel = heap_open(reloid, AccessShareLock);
+ idxoid = RelationGetReplicaIndex(rel);
+ heap_close(rel, AccessShareLock);
+
+ if (OidIsValid(idxoid))
+ PG_RETURN_OID(idxoid);
+ else
+ PG_RETURN_NULL();
+}
#include "catalog/pg_replication_origin.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_subscription.h"
+#include "catalog/pg_subscription_rel.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_ts_config.h"
64
},
{PublicationRelRelationId, /* PUBLICATIONRELMAP */
- PublicationRelMapIndexId,
+ PublicationRelPrrelidPrpubidIndexId,
2,
{
Anum_pg_publication_rel_prrelid,
},
4
},
+ {SubscriptionRelRelationId, /* SUBSCRIPTIONRELMAP */
+ SubscriptionRelSrrelidSrsubidIndexId,
+ 2,
+ {
+ Anum_pg_subscription_rel_srrelid,
+ Anum_pg_subscription_rel_srsubid,
+ 0,
+ 0
+ },
+ 64
+ },
{TableSpaceRelationId, /* TABLESPACEOID */
TablespaceOidIndexId,
1,
NULL, NULL, NULL
},
+ {
+ {"max_sync_workers_per_subscription",
+ PGC_SIGHUP,
+ RESOURCES_ASYNCHRONOUS,
+ gettext_noop("Maximum number of table synchronization workers per subscription."),
+ NULL,
+ },
+ &max_sync_workers_per_subscription,
+ 2, 0, MAX_BACKENDS,
+ NULL, NULL, NULL
+ },
+
{
{"log_rotation_age", PGC_SIGHUP, LOGGING_WHERE,
gettext_noop("Automatic log file rotation will occur after N minutes."),
int use_setsessauth;
int enable_row_security;
int include_subscriptions;
- int no_create_subscription_slots;
+ int no_subscription_connect;
/* default, if no "inclusion" switches appear, is to dump everything */
bool include_everything;
{"snapshot", required_argument, NULL, 6},
{"strict-names", no_argument, &strict_names, 1},
{"use-set-session-authorization", no_argument, &dopt.use_setsessauth, 1},
- {"no-create-subscription-slots", no_argument, &dopt.no_create_subscription_slots, 1},
{"no-security-labels", no_argument, &dopt.no_security_labels, 1},
+ {"no-subscription-connect", no_argument, &dopt.no_subscription_connect, 1},
{"no-synchronized-snapshots", no_argument, &dopt.no_synchronized_snapshots, 1},
{"no-unlogged-table-data", no_argument, &dopt.no_unlogged_table_data, 1},
{"no-sync", no_argument, NULL, 7},
printf(_(" --if-exists use IF EXISTS when dropping objects\n"));
printf(_(" --include-subscriptions dump logical replication subscriptions\n"));
printf(_(" --inserts dump data as INSERT commands, rather than COPY\n"));
- printf(_(" --no-create-subscription-slots\n"
- " do not create replication slots for subscriptions\n"));
printf(_(" --no-security-labels do not dump security label assignments\n"));
+ printf(_(" --no-subscription-connect dump subscriptions so they don't connect on restore\n"));
printf(_(" --no-synchronized-snapshots do not use synchronized snapshots in parallel jobs\n"));
printf(_(" --no-tablespaces do not dump tablespace assignments\n"));
printf(_(" --no-unlogged-table-data do not dump unlogged table data\n"));
appendPQExpBufferStr(query, ", SLOT NAME = ");
appendStringLiteralAH(query, subinfo->subslotname, fout);
- if (dopt->no_create_subscription_slots)
- appendPQExpBufferStr(query, ", NOCREATE SLOT");
+ if (dopt->no_subscription_connect)
+ appendPQExpBufferStr(query, ", NOCONNECT");
appendPQExpBufferStr(query, ");\n");
create_order => 50,
create_sql => 'CREATE SUBSCRIPTION sub1
CONNECTION \'dbname=doesnotexist\' PUBLICATION pub1
- WITH (DISABLED, NOCREATE SLOT);',
+ WITH (DISABLED, NOCONNECT);',
regexp => qr/^
\QCREATE SUBSCRIPTION sub1 CONNECTION 'dbname=doesnotexist' PUBLICATION pub1 WITH (DISABLED, SLOT NAME = 'sub1');\E
/xm,
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201703221
+#define CATALOG_VERSION_NO 201703231
#endif
DECLARE_UNIQUE_INDEX(pg_publication_rel_oid_index, 6112, on pg_publication_rel using btree(oid oid_ops));
#define PublicationRelObjectIndexId 6112
-DECLARE_UNIQUE_INDEX(pg_publication_rel_map_index, 6113, on pg_publication_rel using btree(prrelid oid_ops, prpubid oid_ops));
-#define PublicationRelMapIndexId 6113
+DECLARE_UNIQUE_INDEX(pg_publication_rel_prrelid_prpubid_index, 6113, on pg_publication_rel using btree(prrelid oid_ops, prpubid oid_ops));
+#define PublicationRelPrrelidPrpubidIndexId 6113
DECLARE_UNIQUE_INDEX(pg_subscription_oid_index, 6114, on pg_subscription using btree(oid oid_ops));
#define SubscriptionObjectIndexId 6114
DECLARE_UNIQUE_INDEX(pg_subscription_subname_index, 6115, on pg_subscription using btree(subdbid oid_ops, subname name_ops));
#define SubscriptionNameIndexId 6115
+DECLARE_UNIQUE_INDEX(pg_subscription_rel_srrelid_srsubid_index, 6117, on pg_subscription_rel using btree(srrelid oid_ops, srsubid oid_ops));
+#define SubscriptionRelSrrelidSrsubidIndexId 6117
+
/* last step of initialization script: build the indexes declared above */
BUILD_INDICES
DATA(insert OID = 3843 ( pg_column_is_updatable PGNSP PGUID 12 10 0 0 0 f f f f t f s s 3 0 16 "2205 21 16" _null_ _null_ _null_ _null_ _null_ pg_column_is_updatable _null_ _null_ _null_ ));
DESCR("is a column updatable");
+DATA(insert OID = 6120 ( pg_get_replica_identity_index PGNSP PGUID 12 10 0 0 0 f f f f t f s s 1 0 2205 "2205" _null_ _null_ _null_ _null_ _null_ pg_get_replica_identity_index _null_ _null_ _null_ ));
+DESCR("oid of replica identity index if any");
+
/* Deferrable unique constraint trigger */
DATA(insert OID = 1250 ( unique_key_recheck PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ unique_key_recheck _null_ _null_ _null_ ));
DESCR("deferred UNIQUE constraint check");
DESCR("statistics: information about currently active replication");
DATA(insert OID = 3317 ( pg_stat_get_wal_receiver PGNSP PGUID 12 1 0 0 0 f f f f f f s r 0 0 2249 "" "{23,25,3220,23,3220,23,1184,1184,3220,1184,25,25}" "{o,o,o,o,o,o,o,o,o,o,o,o}" "{pid,status,receive_start_lsn,receive_start_tli,received_lsn,received_tli,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time,slot_name,conninfo}" _null_ _null_ pg_stat_get_wal_receiver _null_ _null_ _null_ ));
DESCR("statistics: information about WAL receiver");
-DATA(insert OID = 6118 ( pg_stat_get_subscription PGNSP PGUID 12 1 0 0 0 f f f f f f s r 1 0 2249 "26" "{26,26,23,3220,1184,1184,3220,1184}" "{i,o,o,o,o,o,o,o}" "{subid,subid,pid,received_lsn,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time}" _null_ _null_ pg_stat_get_subscription _null_ _null_ _null_ ));
+DATA(insert OID = 6118 ( pg_stat_get_subscription PGNSP PGUID 12 1 0 0 0 f f f f f f s r 1 0 2249 "26" "{26,26,26,23,3220,1184,1184,3220,1184}" "{i,o,o,o,o,o,o,o,o}" "{subid,subid,relid,pid,received_lsn,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time}" _null_ _null_ pg_stat_get_subscription _null_ _null_ _null_ ));
DESCR("statistics: information about subscription");
DATA(insert OID = 2026 ( pg_backend_pid PGNSP PGUID 12 1 0 0 0 f f f f t f s r 0 0 23 "" _null_ _null_ _null_ _null_ _null_ pg_backend_pid _null_ _null_ _null_ ));
DESCR("statistics: current backend PID");
--- /dev/null
+/* -------------------------------------------------------------------------
+ *
+ * pg_subscription_rel.h
+ * Local info about tables that come from the publisher of a
+ * subscription (pg_subscription_rel).
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * -------------------------------------------------------------------------
+ */
+#ifndef PG_SUBSCRIPTION_REL_H
+#define PG_SUBSCRIPTION_REL_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ * pg_subscription_rel definition. cpp turns this into
+ * typedef struct FormData_pg_subscription_rel
+ * ----------------
+ */
+#define SubscriptionRelRelationId 6102
+
+/* Workaround for genbki not knowing about XLogRecPtr */
+#define pg_lsn XLogRecPtr
+
+CATALOG(pg_subscription_rel,6102) BKI_WITHOUT_OIDS
+{
+ Oid srsubid; /* Oid of subscription */
+ Oid srrelid; /* Oid of relation */
+ char srsubstate; /* state of the relation in subscription */
+ pg_lsn srsublsn; /* remote lsn of the state change
+ * used for synchronization coordination */
+} FormData_pg_subscription_rel;
+
+typedef FormData_pg_subscription_rel *Form_pg_subscription_rel;
+
+/* ----------------
+ * compiler constants for pg_subscription_rel
+ * ----------------
+ */
+#define Natts_pg_subscription_rel 4
+#define Anum_pg_subscription_rel_srsubid 1
+#define Anum_pg_subscription_rel_srrelid 2
+#define Anum_pg_subscription_rel_srsubstate 3
+#define Anum_pg_subscription_rel_srsublsn 4
+
+/* ----------------
+ * substate constants
+ * ----------------
+ */
+#define SUBREL_STATE_INIT 'i' /* initializing (sublsn NULL) */
+#define SUBREL_STATE_DATASYNC 'd' /* data is being synchronized (sublsn NULL) */
+#define SUBREL_STATE_SYNCDONE 's' /* synchronization finished infront of apply (sublsn set) */
+#define SUBREL_STATE_READY 'r' /* ready (sublsn set) */
+
+/* These are never stored in the catalog, we only use them for IPC. */
+#define SUBREL_STATE_UNKNOWN '\0' /* unknown state */
+#define SUBREL_STATE_SYNCWAIT 'w' /* waiting for sync */
+#define SUBREL_STATE_CATCHUP 'c' /* catching up with apply */
+
+typedef struct SubscriptionRelState
+{
+ Oid relid;
+ XLogRecPtr lsn;
+ char state;
+} SubscriptionRelState;
+
+extern Oid SetSubscriptionRelState(Oid subid, Oid relid, char state,
+ XLogRecPtr sublsn);
+extern char GetSubscriptionRelState(Oid subid, Oid relid,
+ XLogRecPtr *sublsn, bool missing_ok);
+extern void RemoveSubscriptionRel(Oid subid, Oid relid);
+
+extern List *GetSubscriptionRelations(Oid subid);
+extern List *GetSubscriptionNotReadyRelations(Oid subid);
+
+#endif /* PG_SUBSCRIPTION_REL_H */
/* CopyStateData is private in commands/copy.c */
typedef struct CopyStateData *CopyState;
+typedef int (*copy_data_source_cb) (void *outbuf, int minread, int maxread);
extern void DoCopy(ParseState *state, const CopyStmt *stmt,
int stmt_location, int stmt_len,
extern void ProcessCopyOptions(ParseState *pstate, CopyState cstate, bool is_from, List *options);
extern CopyState BeginCopyFrom(ParseState *pstate, Relation rel, const char *filename,
- bool is_program, List *attnamelist, List *options);
+ bool is_program, copy_data_source_cb data_source_cb, List *attnamelist, List *options);
extern void EndCopyFrom(CopyState cstate);
extern bool NextCopyFrom(CopyState cstate, ExprContext *econtext,
Datum *values, bool *nulls, Oid *tupleOid);
char ***fields, int *nfields);
extern void CopyFromErrorCallback(void *arg);
+extern uint64 CopyFrom(CopyState cstate);
+
extern DestReceiver *CreateCopyDestReceiver(void);
#endif /* COPY_H */
T_DropReplicationSlotCmd,
T_StartReplicationCmd,
T_TimeLineHistoryCmd,
+ T_SQLCmd,
/*
* TAGS FOR RANDOM OTHER STUFF
List *options; /* List of DefElem nodes */
} CreateSubscriptionStmt;
+typedef enum AlterSubscriptionType
+{
+ ALTER_SUBSCRIPTION_OPTIONS,
+ ALTER_SUBSCRIPTION_CONNECTION,
+ ALTER_SUBSCRIPTION_PUBLICATION,
+ ALTER_SUBSCRIPTION_PUBLICATION_REFRESH,
+ ALTER_SUBSCRIPTION_REFRESH,
+ ALTER_SUBSCRIPTION_ENABLED
+} AlterSubscriptionType;
+
typedef struct AlterSubscriptionStmt
{
NodeTag type;
+ AlterSubscriptionType kind; /* ALTER_SUBSCRIPTION_OPTIONS, etc */
char *subname; /* Name of of the subscription */
+ char *conninfo; /* Connection string to publisher */
+ List *publication; /* One or more publication to subscribe to */
List *options; /* List of DefElem nodes */
} AlterSubscriptionStmt;
TimeLineID timeline;
} TimeLineHistoryCmd;
+/* ----------------------
+ * SQL commands
+ * ----------------------
+ */
+typedef struct SQLCmd
+{
+ NodeTag type;
+} SQLCmd;
+
#endif /* REPLNODES_H */
PG_KEYWORD("next", NEXT, UNRESERVED_KEYWORD)
PG_KEYWORD("no", NO, UNRESERVED_KEYWORD)
PG_KEYWORD("none", NONE, COL_NAME_KEYWORD)
+PG_KEYWORD("norefresh", NOREFRESH, UNRESERVED_KEYWORD)
PG_KEYWORD("not", NOT, RESERVED_KEYWORD)
PG_KEYWORD("nothing", NOTHING, UNRESERVED_KEYWORD)
PG_KEYWORD("notify", NOTIFY, UNRESERVED_KEYWORD)
WAIT_EVENT_PARALLEL_FINISH,
WAIT_EVENT_PARALLEL_BITMAP_SCAN,
WAIT_EVENT_SAFE_SNAPSHOT,
- WAIT_EVENT_SYNC_REP
+ WAIT_EVENT_SYNC_REP,
+ WAIT_EVENT_LOGICAL_SYNC_DATA,
+ WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE
} WaitEventIPC;
/* ----------
/* memory context this is all allocated in */
MemoryContext context;
- /* infrastructure pieces */
- XLogReaderState *reader;
+ /* The associated replication slot */
ReplicationSlot *slot;
+
+ /* infrastructure pieces for decoding */
+ XLogReaderState *reader;
struct ReorderBuffer *reorder;
struct SnapBuild *snapshot_builder;
TransactionId write_xid;
} LogicalDecodingContext;
+
extern void CheckLogicalDecodingRequirements(void);
extern LogicalDecodingContext *CreateInitDecodingContext(char *plugin,
extern bool DecodingContextReady(LogicalDecodingContext *ctx);
extern void FreeDecodingContext(LogicalDecodingContext *ctx);
+extern LogicalDecodingContext *CreateCopyDecodingContext(
+ List *output_plugin_options,
+ LogicalOutputPluginWriterPrepareWrite prepare_write,
+ LogicalOutputPluginWriterWrite do_write);
+extern List *DecodingContextGetTableList(LogicalDecodingContext *ctx);
+
extern void LogicalIncreaseXminForSlot(XLogRecPtr lsn, TransactionId xmin);
extern void LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn,
XLogRecPtr restart_lsn);
#define LOGICALLAUNCHER_H
extern int max_logical_replication_workers;
+extern int max_sync_workers_per_subscription;
extern void ApplyLauncherRegister(void);
extern void ApplyLauncherMain(Datum main_arg);
extern void SnapBuildSnapDecRefcount(Snapshot snap);
+extern Snapshot SnapBuildInitalSnapshot(SnapBuild *builder);
extern const char *SnapBuildExportSnapshot(SnapBuild *snapstate);
extern void SnapBuildClearExportedSnapshot(void);
#include "access/xlog.h"
#include "access/xlogdefs.h"
#include "fmgr.h"
+#include "replication/logicalproto.h"
+#include "replication/walsender.h"
#include "storage/latch.h"
#include "storage/spin.h"
#include "pgtime.h"
+#include "utils/tuplestore.h"
/* user-settable parameters */
extern int wal_receiver_status_interval;
struct WalReceiverConn;
typedef struct WalReceiverConn WalReceiverConn;
+/*
+ * Status of walreceiver query execution.
+ *
+ * We only define statuses that are currently used.
+ */
+typedef enum
+{
+ WALRCV_ERROR, /* There was error when executing the query. */
+ WALRCV_OK_COMMAND, /* Query executed utility or replication command. */
+ WALRCV_OK_TUPLES, /* Query returned tuples. */
+ WALRCV_OK_COPY_IN, /* Query started COPY FROM. */
+ WALRCV_OK_COPY_OUT, /* Query started COPY TO. */
+ WALRCV_OK_COPY_BOTH, /* Query started COPY BOTH replication protocol. */
+} WalRcvExecStatus;
+
+/*
+ * Return value for walrcv_query, returns the status of the execution and
+ * tuples if any.
+ */
+typedef struct WalRcvExecResult
+{
+ WalRcvExecStatus status;
+ char *err;
+ Tuplestorestate *tuplestore;
+ TupleDesc tupledesc;
+} WalRcvExecResult;
+
/* libpqwalreceiver hooks */
typedef WalReceiverConn *(*walrcv_connect_fn) (const char *conninfo, bool logical,
const char *appname,
int nbytes);
typedef char *(*walrcv_create_slot_fn) (WalReceiverConn *conn,
const char *slotname, bool temporary,
- bool export_snapshot, XLogRecPtr *lsn);
-typedef bool (*walrcv_command_fn) (WalReceiverConn *conn, const char *cmd,
- char **err);
+ CRSSnapshotAction snapshot_action,
+ XLogRecPtr *lsn);
+typedef WalRcvExecResult *(*walrcv_exec_fn) (WalReceiverConn *conn,
+ const char *query,
+ const int nRetTypes,
+ const Oid *retTypes);
typedef void (*walrcv_disconnect_fn) (WalReceiverConn *conn);
typedef struct WalReceiverFunctionsType
walrcv_receive_fn walrcv_receive;
walrcv_send_fn walrcv_send;
walrcv_create_slot_fn walrcv_create_slot;
- walrcv_command_fn walrcv_command;
+ walrcv_exec_fn walrcv_exec;
walrcv_disconnect_fn walrcv_disconnect;
} WalReceiverFunctionsType;
WalReceiverFunctions->walrcv_receive(conn, buffer, wait_fd)
#define walrcv_send(conn, buffer, nbytes) \
WalReceiverFunctions->walrcv_send(conn, buffer, nbytes)
-#define walrcv_create_slot(conn, slotname, temporary, export_snapshot, lsn) \
- WalReceiverFunctions->walrcv_create_slot(conn, slotname, temporary, export_snapshot, lsn)
-#define walrcv_command(conn, cmd, err) \
- WalReceiverFunctions->walrcv_command(conn, cmd, err)
+#define walrcv_create_slot(conn, slotname, temporary, snapshot_action, lsn) \
+ WalReceiverFunctions->walrcv_create_slot(conn, slotname, temporary, snapshot_action, lsn)
+#define walrcv_exec(conn, exec, nRetTypes, retTypes) \
+ WalReceiverFunctions->walrcv_exec(conn, exec, nRetTypes, retTypes)
#define walrcv_disconnect(conn) \
WalReceiverFunctions->walrcv_disconnect(conn)
+static inline void
+walrcv_clear_result(WalRcvExecResult *walres)
+{
+ if (!walres)
+ return;
+
+ if (walres->err)
+ pfree(walres->err);
+
+ if (walres->tuplestore)
+ tuplestore_end(walres->tuplestore);
+
+ if (walres->tupledesc)
+ FreeTupleDesc(walres->tupledesc);
+
+ pfree(walres);
+}
+
/* prototypes for functions in walreceiver.c */
extern void WalReceiverMain(void) pg_attribute_noreturn();
#include "fmgr.h"
+/*
+ * What to do with a snapshot in create replication slot command.
+ */
+typedef enum
+{
+ CRS_EXPORT_SNAPSHOT,
+ CRS_NOEXPORT_SNAPSHOT,
+ CRS_USE_SNAPSHOT
+} CRSSnapshotAction;
+
/* global state */
extern bool am_walsender;
extern bool am_cascading_walsender;
extern bool log_replication_commands;
extern void InitWalSender(void);
-extern void exec_replication_command(const char *query_string);
+extern bool exec_replication_command(const char *query_string);
extern void WalSndErrorCleanup(void);
extern void WalSndSignals(void);
extern Size WalSndShmemSize(void);
/* Used for initial table synchronization. */
Oid relid;
+ char relstate;
+ XLogRecPtr relstate_lsn;
+ slock_t relmutex;
/* Stats. */
XLogRecPtr last_lsn;
TimestampTz reply_time;
} LogicalRepWorker;
+/* Memory context for cached variables in apply worker. */
+MemoryContext ApplyCacheContext;
+
/* libpqreceiver connection */
extern struct WalReceiverConn *wrconn;
extern bool got_SIGTERM;
extern void logicalrep_worker_attach(int slot);
-extern LogicalRepWorker *logicalrep_worker_find(Oid subid);
-extern int logicalrep_worker_count(Oid subid);
-extern void logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid);
-extern void logicalrep_worker_stop(Oid subid);
-extern void logicalrep_worker_wakeup(Oid subid);
+extern LogicalRepWorker *logicalrep_worker_find(Oid subid, Oid relid,
+ bool only_running);
+extern void logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname,
+ Oid userid, Oid relid);
+extern void logicalrep_worker_stop(Oid subid, Oid relid);
+extern void logicalrep_worker_wakeup(Oid subid, Oid relid);
+extern void logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker);
+
+extern int logicalrep_sync_worker_count(Oid subid);
extern void logicalrep_worker_sigterm(SIGNAL_ARGS);
+extern char *LogicalRepSyncTableStart(XLogRecPtr *origin_startpos);
+void process_syncing_tables(XLogRecPtr current_lsn);
+void invalidate_syncing_table_states(Datum arg, int cacheid,
+ uint32 hashvalue);
+
+static inline bool
+am_tablesync_worker(void)
+{
+ return OidIsValid(MyLogicalRepWorker->relid);
+}
#endif /* WORKER_INTERNAL_H */
STATRELATTINH,
SUBSCRIPTIONOID,
SUBSCRIPTIONNAME,
+ SUBSCRIPTIONRELMAP,
TABLESPACEOID,
TRFOID,
TRFTYPELANG,
FROM SQL WITH FUNCTION varchar_transform(internal),
TO SQL WITH FUNCTION int4recv(internal));
CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable;
-CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCREATE SLOT);
+CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT);
+WARNING: tables were not subscribed, you will have to run ALTER SUBSCRIPTION ... REFRESH PUBLICATION to subscribe the tables
-- test some error cases
SELECT pg_get_object_address('stone', '{}', '{}');
ERROR: unrecognized object type "stone"
pg_stat_subscription| SELECT su.oid AS subid,
su.subname,
st.pid,
+ st.relid,
st.received_lsn,
st.last_msg_send_time,
st.last_msg_receipt_time,
st.latest_end_lsn,
st.latest_end_time
FROM (pg_subscription su
- LEFT JOIN pg_stat_get_subscription(NULL::oid) st(subid, pid, received_lsn, last_msg_send_time, last_msg_receipt_time, latest_end_lsn, latest_end_time) ON ((st.subid = su.oid)));
+ LEFT JOIN pg_stat_get_subscription(NULL::oid) st(subid, relid, pid, received_lsn, last_msg_send_time, last_msg_receipt_time, latest_end_lsn, latest_end_time) ON ((st.subid = su.oid)));
pg_stat_sys_indexes| SELECT pg_stat_all_indexes.relid,
pg_stat_all_indexes.indexrelid,
pg_stat_all_indexes.schemaname,
pg_shseclabel|t
pg_statistic|t
pg_subscription|t
+pg_subscription_rel|t
pg_tablespace|t
pg_transform|t
pg_trigger|t
ERROR: syntax error at or near "PUBLICATION"
LINE 1: CREATE SUBSCRIPTION testsub PUBLICATION foo;
^
-set client_min_messages to error;
-- fail - cannot do CREATE SUBSCRIPTION CREATE SLOT inside transaction block
BEGIN;
CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub WITH (CREATE SLOT);
CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub;
ERROR: invalid connection string syntax: missing "=" after "testconn" in connection info string
-CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (DISABLED, NOCREATE SLOT);
-reset client_min_messages;
+CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (NOCONNECT);
+WARNING: tables were not subscribed, you will have to run ALTER SUBSCRIPTION ... REFRESH PUBLICATION to subscribe the tables
\dRs+
List of subscriptions
Name | Owner | Enabled | Publication | Conninfo
testsub | regress_subscription_user | f | {testpub} | dbname=doesnotexist
(1 row)
-ALTER SUBSCRIPTION testsub SET PUBLICATION testpub2, testpub3;
-\dRs
- List of subscriptions
- Name | Owner | Enabled | Publication
----------+---------------------------+---------+---------------------
- testsub | regress_subscription_user | f | {testpub2,testpub3}
-(1 row)
-
+ALTER SUBSCRIPTION testsub SET PUBLICATION testpub2, testpub3 NOREFRESH;
ALTER SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist2';
-ALTER SUBSCRIPTION testsub SET PUBLICATION testpub, testpub1;
\dRs+
List of subscriptions
- Name | Owner | Enabled | Publication | Conninfo
----------+---------------------------+---------+--------------------+----------------------
- testsub | regress_subscription_user | f | {testpub,testpub1} | dbname=doesnotexist2
+ Name | Owner | Enabled | Publication | Conninfo
+---------+---------------------------+---------+---------------------+----------------------
+ testsub | regress_subscription_user | f | {testpub2,testpub3} | dbname=doesnotexist2
(1 row)
BEGIN;
ALTER SUBSCRIPTION testsub ENABLE;
\dRs
- List of subscriptions
- Name | Owner | Enabled | Publication
----------+---------------------------+---------+--------------------
- testsub | regress_subscription_user | t | {testpub,testpub1}
+ List of subscriptions
+ Name | Owner | Enabled | Publication
+---------+---------------------------+---------+---------------------
+ testsub | regress_subscription_user | t | {testpub2,testpub3}
(1 row)
ALTER SUBSCRIPTION testsub DISABLE;
\dRs
- List of subscriptions
- Name | Owner | Enabled | Publication
----------+---------------------------+---------+--------------------
- testsub | regress_subscription_user | f | {testpub,testpub1}
+ List of subscriptions
+ Name | Owner | Enabled | Publication
+---------+---------------------------+---------+---------------------
+ testsub | regress_subscription_user | f | {testpub2,testpub3}
(1 row)
COMMIT;
RESET ROLE;
ALTER SUBSCRIPTION testsub RENAME TO testsub_foo;
\dRs
- List of subscriptions
- Name | Owner | Enabled | Publication
--------------+---------------------------+---------+--------------------
- testsub_foo | regress_subscription_user | f | {testpub,testpub1}
+ List of subscriptions
+ Name | Owner | Enabled | Publication
+-------------+---------------------------+---------+---------------------
+ testsub_foo | regress_subscription_user | f | {testpub2,testpub3}
(1 row)
-- rename back to keep the rest simple
FROM SQL WITH FUNCTION varchar_transform(internal),
TO SQL WITH FUNCTION int4recv(internal));
CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable;
-CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCREATE SLOT);
+CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT);
-- test some error cases
SELECT pg_get_object_address('stone', '{}', '{}');
-- fail - no connection
CREATE SUBSCRIPTION testsub PUBLICATION foo;
-set client_min_messages to error;
-- fail - cannot do CREATE SUBSCRIPTION CREATE SLOT inside transaction block
BEGIN;
CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub WITH (CREATE SLOT);
COMMIT;
CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub;
-CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (DISABLED, NOCREATE SLOT);
-reset client_min_messages;
-\dRs+
-
-ALTER SUBSCRIPTION testsub SET PUBLICATION testpub2, testpub3;
+CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (NOCONNECT);
-\dRs
+\dRs+
+ALTER SUBSCRIPTION testsub SET PUBLICATION testpub2, testpub3 NOREFRESH;
ALTER SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist2';
-ALTER SUBSCRIPTION testsub SET PUBLICATION testpub, testpub1;
\dRs+
use warnings;
use PostgresNode;
use TestLib;
-use Test::More tests => 11;
+use Test::More tests => 14;
# Initialize publisher node
my $node_publisher = get_new_node('publisher');
$node_publisher->safe_psql('postgres',
"CREATE TABLE tab_notrep AS SELECT generate_series(1,10) AS a");
$node_publisher->safe_psql('postgres',
- "CREATE TABLE tab_ins (a int)");
+ "CREATE TABLE tab_ins AS SELECT generate_series(1,1002) AS a");
$node_publisher->safe_psql('postgres',
"CREATE TABLE tab_full AS SELECT generate_series(1,10) AS a");
$node_publisher->safe_psql('postgres',
$node_publisher->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for subscriber to catch up";
+# Also wait for initial table sync to finish
+my $synced_query =
+"SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');";
+$node_subscriber->poll_query_until('postgres', $synced_query)
+ or die "Timed out while waiting for subscriber to synchronize data";
+
my $result =
$node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_notrep");
is($result, qq(0), 'check non-replicated table is empty on subscriber');
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_ins");
+is($result, qq(1002), 'check initial data was copied to subscriber');
+
$node_publisher->safe_psql('postgres',
"INSERT INTO tab_ins SELECT generate_series(1,50)");
$node_publisher->safe_psql('postgres',
$result =
$node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins");
-is($result, qq(50|1|50), 'check replicated inserts on subscriber');
+is($result, qq(1052|1|1002), 'check replicated inserts on subscriber');
$result =
$node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_rep");
$result =
$node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_full");
-is($result, qq(10|1|100), 'update works with REPLICA IDENTITY FULL and duplicate tuples');
+is($result, qq(20|1|100), 'update works with REPLICA IDENTITY FULL and duplicate tuples');
# check that change of connection string and/or publication list causes
# restart of subscription workers. Not all of these are registered as tests
$oldpid = $node_publisher->safe_psql('postgres',
"SELECT pid FROM pg_stat_replication WHERE application_name = '$appname';");
$node_subscriber->safe_psql('postgres',
- "ALTER SUBSCRIPTION tap_sub SET PUBLICATION tap_pub_ins_only");
+ "ALTER SUBSCRIPTION tap_sub SET PUBLICATION tap_pub_ins_only REFRESH WITH (NOCOPY DATA)");
$node_publisher->poll_query_until('postgres',
"SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = '$appname';")
or die "Timed out while waiting for apply to restart";
$result =
$node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins");
-is($result, qq(150|1|1100), 'check replicated inserts after subscription publication change');
+is($result, qq(1152|1|1100), 'check replicated inserts after subscription publication change');
$result =
$node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_rep");
"ALTER PUBLICATION tap_pub_ins_only ADD TABLE tab_full");
$node_publisher->safe_psql('postgres',
"DELETE FROM tab_ins WHERE a > 0");
+$node_subscriber->safe_psql('postgres',
+ "ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION WITH (NOCOPY DATA)");
$node_publisher->safe_psql('postgres',
"INSERT INTO tab_full VALUES(0)");
# note that data are different on provider and subscriber
$result =
$node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins");
-is($result, qq(50|1|50), 'check replicated deletes after alter publication');
+is($result, qq(1052|1|1002), 'check replicated deletes after alter publication');
$result =
$node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_full");
-is($result, qq(11|0|100), 'check replicated insert after alter publication');
+is($result, qq(21|0|100), 'check replicated insert after alter publication');
# check restart on rename
$oldpid = $node_publisher->safe_psql('postgres',
$node_publisher->safe_psql('postgres', "SELECT count(*) FROM pg_replication_slots");
is($result, qq(0), 'check replication slot was dropped on publisher');
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_subscription_rel");
+is($result, qq(0), 'check subscription relation status was dropped on subscriber');
+
+$result =
+ $node_publisher->safe_psql('postgres', "SELECT count(*) FROM pg_replication_slots");
+is($result, qq(0), 'check replication slot was dropped on publisher');
+
$result =
$node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_replication_origin");
is($result, qq(0), 'check replication origin was dropped on subscriber');
$node_publisher->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for subscriber to catch up";
+# Wait for initial sync to finish as well
+my $synced_query =
+"SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('s', 'r');";
+$node_subscriber->poll_query_until('postgres', $synced_query)
+ or die "Timed out while waiting for subscriber to synchronize data";
+
# Insert initial test data
$node_publisher->safe_psql('postgres', qq(
-- test_tbl_one_array_col
my $appname = 'tap_sub';
$node_subscriber->safe_psql('postgres',
- "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub;");
+ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (NOCOPY DATA)");
# Wait for subscriber to finish initialization
my $caughtup_query =
--- /dev/null
+# Tests for logical replication table syncing
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 7;
+
+# Initialize publisher node
+my $node_publisher = get_new_node('publisher');
+$node_publisher->init(allows_streaming => 'logical');
+$node_publisher->start;
+
+# Create subscriber node
+my $node_subscriber = get_new_node('subscriber');
+$node_subscriber->init(allows_streaming => 'logical');
+$node_subscriber->start;
+
+# Create some preexisting content on publisher
+$node_publisher->safe_psql('postgres',
+ "CREATE TABLE tab_rep (a int primary key)");
+$node_publisher->safe_psql('postgres',
+ "INSERT INTO tab_rep SELECT generate_series(1,10)");
+
+# Setup structure on subscriber
+$node_subscriber->safe_psql('postgres',
+ "CREATE TABLE tab_rep (a int primary key)");
+
+# Setup logical replication
+my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
+$node_publisher->safe_psql('postgres',
+ "CREATE PUBLICATION tap_pub FOR ALL TABLES");
+
+my $appname = 'tap_sub';
+$node_subscriber->safe_psql('postgres',
+ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub");
+
+# Wait for subscriber to finish initialization
+my $caughtup_query =
+"SELECT pg_current_wal_location() <= replay_location FROM pg_stat_replication WHERE application_name = '$appname';";
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+# Also wait for initial table sync to finish
+my $synced_query =
+"SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');";
+$node_subscriber->poll_query_until('postgres', $synced_query)
+ or die "Timed out while waiting for subscriber to synchronize data";
+
+my $result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep");
+is($result, qq(10), 'initial data synced for first sub');
+
+# drop subscription so that there is unreplicated data
+$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub");
+
+$node_publisher->safe_psql('postgres',
+ "INSERT INTO tab_rep SELECT generate_series(11,20)");
+
+# recreate the subscription, it will try to do initial copy
+$node_subscriber->safe_psql('postgres',
+ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub");
+
+# but it will be stuck on data copy as it will fail on constraint
+my $started_query =
+"SELECT srsubstate = 'd' FROM pg_subscription_rel;";
+$node_subscriber->poll_query_until('postgres', $started_query)
+ or die "Timed out while waiting for subscriber to start sync";
+
+# remove the conflicting data
+$node_subscriber->safe_psql('postgres',
+ "DELETE FROM tab_rep;");
+
+# wait for sync to finish this time
+$node_subscriber->poll_query_until('postgres', $synced_query)
+ or die "Timed out while waiting for subscriber to synchronize data";
+
+# check that all data is synced
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep");
+is($result, qq(20), 'initial data synced for second sub');
+
+# now check another subscription for the same node pair
+$node_subscriber->safe_psql('postgres',
+ "CREATE SUBSCRIPTION tap_sub2 CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (NOCOPY DATA)");
+
+# wait for it to start
+$node_subscriber->poll_query_until('postgres', "SELECT pid IS NOT NULL FROM pg_stat_subscription WHERE subname = 'tap_sub2' AND relid IS NULL")
+ or die "Timed out while waiting for subscriber to start";
+
+# and drop both subscriptions
+$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub");
+$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub2");
+
+# check subscriptions are removed
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_subscription");
+is($result, qq(0), 'second and third sub are dropped');
+
+# remove the conflicting data
+$node_subscriber->safe_psql('postgres',
+ "DELETE FROM tab_rep;");
+
+# recreate the subscription again
+$node_subscriber->safe_psql('postgres',
+ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub");
+
+# and wait for data sync to finish again
+$node_subscriber->poll_query_until('postgres', $synced_query)
+ or die "Timed out while waiting for subscriber to synchronize data";
+
+# check that all data is synced
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep");
+is($result, qq(20), 'initial data synced for fourth sub');
+
+# add new table on subscriber
+$node_subscriber->safe_psql('postgres',
+ "CREATE TABLE tab_rep_next (a int)");
+
+# setup structure with existing data on pubisher
+$node_publisher->safe_psql('postgres',
+ "CREATE TABLE tab_rep_next (a) AS SELECT generate_series(1,10)");
+
+# Wait for subscription to catch up
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep_next");
+is($result, qq(0), 'no data for table added after subscription initialized');
+
+# ask for data sync
+$node_subscriber->safe_psql('postgres',
+ "ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION");
+
+# wait for sync to finish
+$node_subscriber->poll_query_until('postgres', $synced_query)
+ or die "Timed out while waiting for subscriber to synchronize data";
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep_next");
+is($result, qq(10), 'data for table added after subscription initialized are now synced');
+
+# Add some data
+$node_publisher->safe_psql('postgres',
+ "INSERT INTO tab_rep_next SELECT generate_series(1,10)");
+
+# Wait for subscription to catch up
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep_next");
+is($result, qq(20), 'changes for table added after subscription initialized replicated');
+
+$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub");
+
+$node_subscriber->stop('fast');
+$node_publisher->stop('fast');