Skip to content

Commit 08208ba

Browse files
wadlejitendradanishedbmnenciaNiccoloFeijsilvela
authored
test: e2e tests for replication slots (cloudnative-pg#760)
refine replication slots tests. Test failover/switchover/scaling with replication slots on or off Signed-off-by: Jitendra Wadle <jitendra.wadle@enterprisedb.com> Signed-off-by: Danish Khan <danish.khan@enterprisedb.com> Signed-off-by: Niccolò Fei <niccolo.fei@enterprisedb.com> Signed-off-by: Jaime Silvela <jaime.silvela@enterprisedb.com> Signed-off-by: Marco Nenciarini <marco.nenciarini@enterprisedb.com> Co-authored-by: Danish Khan <danish.khan@enterprisedb.com> Co-authored-by: Marco Nenciarini <marco.nenciarini@enterprisedb.com> Co-authored-by: Niccolò Fei <niccolo.fei@enterprisedb.com> Co-authored-by: Jaime Silvela <jaime.silvela@enterprisedb.com>
1 parent 7b47d7e commit 08208ba

13 files changed

+638
-271
lines changed

docs/src/e2e.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,4 @@ and the following suite of E2E tests are performed on that cluster:
5757
* Physical replica clusters;
5858
* Storage expansion;
5959
* Data corruption;
60+
* Replication Slots;

tests/e2e/asserts_test.go

Lines changed: 69 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,70 +2493,114 @@ func AssertPvcHasLabels(
24932493
})
24942494
}
24952495

2496-
// AssertRepSlotsOnPod checks if all the required replication slot exists in a given pod,
2496+
// AssertReplicationSlotsOnPod checks that all the required replication slots exist in a given pod,
24972497
// and that obsolete slots are correctly deleted (post management operations).
2498-
// In case we are targeting the primary, it will also check if the slot is active.
2499-
func AssertRepSlotsOnPod(
2498+
// In the primary, it will also check if the slots are active.
2499+
func AssertReplicationSlotsOnPod(
25002500
namespace,
25012501
clusterName string,
25022502
pod corev1.Pod,
25032503
) {
2504-
expectedSlots, err := testsUtils.GetExpectedRepSlotsOnPod(namespace, clusterName, pod.GetName(), env)
2504+
// Replication slot high availability requires PostgreSQL 11 or above
2505+
if env.PostgresVersion == 10 {
2506+
Skip("Ignoring replication slots verification for postgres 10")
2507+
return
2508+
}
2509+
2510+
expectedSlots, err := testsUtils.GetExpectedReplicationSlotsOnPod(namespace, clusterName, pod.GetName(), env)
25052511
Expect(err).ToNot(HaveOccurred())
25062512

25072513
Eventually(func() ([]string, error) {
2508-
currentSlots, err := testsUtils.GetRepSlotsOnPod(namespace, pod.GetName(), env)
2514+
currentSlots, err := testsUtils.GetReplicationSlotsOnPod(namespace, pod.GetName(), env)
25092515
return currentSlots, err
25102516
}, 300).Should(BeEquivalentTo(expectedSlots),
2511-
fmt.Sprintf(
2512-
"List of expected replication slots on %v pod %v",
2513-
pod.Labels["role"],
2514-
pod.GetName()))
2517+
func() string {
2518+
return testsUtils.PrintReplicationSlots(namespace, clusterName, env)
2519+
})
25152520

25162521
for _, slot := range expectedSlots {
25172522
query := fmt.Sprintf(
25182523
"SELECT EXISTS (SELECT 1 FROM pg_replication_slots "+
25192524
"WHERE slot_name = '%v' AND active = 'f' "+
25202525
"AND temporary = 'f' AND slot_type = 'physical')", slot)
2521-
description := fmt.Sprintf(
2522-
"On %v pod %v, expect replication slot %v to exist and be inactive",
2523-
pod.Labels["role"],
2524-
pod.GetName(),
2525-
slot)
25262526
if specs.IsPodPrimary(pod) {
25272527
query = fmt.Sprintf(
25282528
"SELECT EXISTS (SELECT 1 FROM pg_replication_slots "+
25292529
"WHERE slot_name = '%v' AND active = 't' "+
25302530
"AND temporary = 'f' AND slot_type = 'physical')", slot)
2531-
description = fmt.Sprintf(
2532-
"On %v pod %v, expect replication slot %v to exist and be active",
2533-
pod.Labels["role"],
2534-
pod.GetName(),
2535-
slot)
25362531
}
25372532
Eventually(func() (string, error) {
25382533
stdout, _, err := testsUtils.RunQueryFromPod(&pod, testsUtils.PGLocalSocketDir,
25392534
"app", "postgres", "''", query, env)
25402535
return strings.TrimSpace(stdout), err
2541-
}, 300).Should(BeEquivalentTo("t"), description)
2536+
}, 300).Should(BeEquivalentTo("t"),
2537+
func() string {
2538+
return testsUtils.PrintReplicationSlots(namespace, clusterName, env)
2539+
})
25422540
}
25432541
}
25442542

2545-
// AssertClusterRepSlotsAligned will compare all the replication slot restart_lsn
2543+
// AssertClusterReplicationSlotsAligned will compare all the replication slot restart_lsn
25462544
// in a cluster. The assertion will succeed if they are all equivalent.
2547-
func AssertClusterRepSlotsAligned(
2545+
func AssertClusterReplicationSlotsAligned(
25482546
namespace,
25492547
clusterName string,
25502548
) {
2549+
// Replication slot high availability requires PostgreSQL 11 or above
2550+
if env.PostgresVersion == 10 {
2551+
Skip("Ignoring replication slots verification for postgres 10")
2552+
}
2553+
25512554
podList, err := env.GetClusterPodList(namespace, clusterName)
25522555
Expect(err).ToNot(HaveOccurred())
25532556
Eventually(func() bool {
25542557
var lsnList []string
25552558
for _, pod := range podList.Items {
2556-
out, err := testsUtils.GetRepSlotsLsnOnPod(namespace, clusterName, pod, env)
2559+
out, err := testsUtils.GetReplicationSlotLsnsOnPod(namespace, clusterName, pod, env)
25572560
Expect(err).ToNot(HaveOccurred())
25582561
lsnList = append(lsnList, out...)
25592562
}
2560-
return testsUtils.CompareLsn(lsnList)
2561-
}, 300).Should(BeEquivalentTo(true))
2563+
return testsUtils.AreSameLsn(lsnList)
2564+
}, 300).Should(BeEquivalentTo(true),
2565+
func() string {
2566+
return testsUtils.PrintReplicationSlots(namespace, clusterName, env)
2567+
})
2568+
}
2569+
2570+
// AssertClusterReplicationSlots will verify if the replication slots of each pod
2571+
// of the cluster exist and are aligned.
2572+
func AssertClusterReplicationSlots(namespace, clusterName string) {
2573+
By("verifying all cluster's replication slots exist and are aligned", func() {
2574+
podList, err := env.GetClusterPodList(namespace, clusterName)
2575+
Expect(err).ToNot(HaveOccurred())
2576+
for _, pod := range podList.Items {
2577+
AssertReplicationSlotsOnPod(namespace, clusterName, pod)
2578+
}
2579+
AssertClusterReplicationSlotsAligned(namespace, clusterName)
2580+
})
2581+
}
2582+
2583+
// AssertClusterRollingRestart restart given cluster
2584+
func AssertClusterRollingRestart(namespace, clusterName string) {
2585+
By(fmt.Sprintf("restarting cluster %v", clusterName), func() {
2586+
cluster, err := env.GetCluster(namespace, clusterName)
2587+
Expect(err).ToNot(HaveOccurred())
2588+
clusterRestarted := cluster.DeepCopy()
2589+
if clusterRestarted.Annotations == nil {
2590+
clusterRestarted.Annotations = make(map[string]string)
2591+
}
2592+
clusterRestarted.Annotations[specs.ClusterRestartAnnotationName] = time.Now().Format(time.RFC3339)
2593+
clusterRestarted.ManagedFields = nil
2594+
err = env.Client.Patch(env.Ctx, clusterRestarted, ctrlclient.MergeFrom(cluster))
2595+
Expect(err).ToNot(HaveOccurred())
2596+
})
2597+
2598+
By("waiting for the cluster to end up in upgrading state", func() {
2599+
// waiting for cluster phase to end up in "Upgrading cluster" state after restarting the cluster.
2600+
Eventually(func() (bool, error) {
2601+
cluster, err := env.GetCluster(namespace, clusterName)
2602+
return cluster.Status.Phase == apiv1.PhaseUpgrade, err
2603+
}, 120, 3).Should(BeTrue())
2604+
})
2605+
AssertClusterIsReady(namespace, clusterName, 300, env)
25622606
}

tests/e2e/fastfailover_test.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,13 @@ import (
2727

2828
var _ = Describe("Fast failover", Serial, Label(tests.LabelPerformance), func() {
2929
const (
30-
sampleFile = fixturesDir + "/fastfailover/cluster-fast-failover.yaml.template"
31-
sampleFileSyncReplicas = fixturesDir + "/fastfailover/cluster-syncreplicas-fast-failover.yaml.template"
32-
webTestFile = fixturesDir + "/fastfailover/webtest.yaml"
33-
webTestSyncReplicas = fixturesDir + "/fastfailover/webtest-syncreplicas.yaml"
34-
webTestJob = fixturesDir + "/fastfailover/apache-benchmark-webtest.yaml"
35-
level = tests.Highest
30+
sampleFileWithoutReplicationSlots = fixturesDir + "/fastfailover/cluster-fast-failover.yaml.template"
31+
sampleFileWithReplicationSlots = fixturesDir + "/fastfailover/cluster-fast-failover-with-repl-slots.yaml.template"
32+
sampleFileSyncReplicas = fixturesDir + "/fastfailover/cluster-syncreplicas-fast-failover.yaml.template"
33+
webTestFile = fixturesDir + "/fastfailover/webtest.yaml"
34+
webTestSyncReplicas = fixturesDir + "/fastfailover/webtest-syncreplicas.yaml"
35+
webTestJob = fixturesDir + "/fastfailover/apache-benchmark-webtest.yaml"
36+
level = tests.Highest
3637
)
3738
var (
3839
namespace string
@@ -82,7 +83,7 @@ var _ = Describe("Fast failover", Serial, Label(tests.LabelPerformance), func()
8283
})
8384

8485
AfterEach(func() {
85-
err := env.DeleteNamespace(namespace)
86+
err := env.DeleteNamespaceAndWait(namespace, 120)
8687
Expect(err).ToNot(HaveOccurred())
8788
})
8889

@@ -95,7 +96,26 @@ var _ = Describe("Fast failover", Serial, Label(tests.LabelPerformance), func()
9596
It("can do a fast failover", func() {
9697
namespace = "primary-failover-time"
9798
clusterName = "cluster-fast-failover"
98-
AssertFastFailOver(namespace, sampleFile, clusterName, webTestFile, webTestJob, maxReattachTime, maxFailoverTime)
99+
AssertFastFailOver(namespace, sampleFileWithoutReplicationSlots, clusterName,
100+
webTestFile, webTestJob, maxReattachTime, maxFailoverTime)
101+
})
102+
})
103+
104+
Context("with async replicas cluster and HA Replication Slots", func() {
105+
// Confirm that a standby closely following the primary doesn't need more
106+
// than 10 seconds to be promoted and be able to start inserting records.
107+
// We test this setting up an application pointing to the rw service,
108+
// forcing a failover and measuring how much time passes between the
109+
// last row written on timeline 1 and the first one on timeline 2.
110+
It("can do a fast failover", func() {
111+
if env.PostgresVersion == 10 {
112+
Skip("replication slots not available in PostgreSQL 10 or older")
113+
}
114+
namespace = "primary-failover-time"
115+
clusterName = "cluster-fast-failover"
116+
AssertFastFailOver(namespace, sampleFileWithReplicationSlots,
117+
clusterName, webTestFile, webTestJob, maxReattachTime, maxFailoverTime)
118+
AssertClusterReplicationSlots(namespace, clusterName)
99119
})
100120
})
101121

0 commit comments

Comments
 (0)