Skip to content

Commit 6345347

Browse files
fix: WAL pvc not deleted while scaling down (cloudnative-pg#1135)
The scaling down process was leaving out the PVC created to store the WAL files meaning that it wasn't deleting the PVC and thus it was leaving the PVC behind, then producing errors when scaling up. This patch will take care of the PVC used to store WAL files while scaling down by deleting those PVC. Signed-off-by: Danish Khan <danish.khan@enterprisedb.com> Signed-off-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com> Co-authored-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com>
1 parent c0f915d commit 6345347

File tree

5 files changed

+227
-41
lines changed

5 files changed

+227
-41
lines changed

controllers/cluster_scale.go

+26-7
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ import (
2828

2929
apiv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
3030
"github.com/cloudnative-pg/cloudnative-pg/pkg/management/log"
31+
"github.com/cloudnative-pg/cloudnative-pg/pkg/specs"
32+
"github.com/cloudnative-pg/cloudnative-pg/pkg/utils"
3133
)
3234

3335
// scaleDownCluster handles the scaling down operations of a PostgreSQL cluster.
@@ -79,11 +81,29 @@ func (r *ClusterReconciler) scaleDownCluster(
7981
},
8082
}
8183

82-
err := r.Delete(ctx, &pvc)
83-
if err != nil {
84+
contextLogger.Info("Deleting PGDATA PVC", "pvc", pvc.Name)
85+
if err := r.Delete(ctx, &pvc); err != nil {
8486
// Ignore if NotFound, otherwise report the error
8587
if !apierrs.IsNotFound(err) {
86-
return fmt.Errorf("scaling down node (pvc) %v: %v", sacrificialInstance.Name, err)
88+
return fmt.Errorf("scaling down node (pgdata pvc) %v: %w", sacrificialInstance.Name, err)
89+
}
90+
}
91+
92+
if cluster.ShouldCreateWalArchiveVolume() {
93+
// Let's drop the WAL PVC too
94+
pvcWalName := specs.GetPVCName(*cluster, sacrificialInstance.Name, utils.PVCRolePgWal)
95+
pvcWal := v1.PersistentVolumeClaim{
96+
ObjectMeta: metav1.ObjectMeta{
97+
Name: pvcWalName,
98+
Namespace: sacrificialInstance.Namespace,
99+
},
100+
}
101+
contextLogger.Info("Deleting WAL PVC", "pvc", pvcWal.Name)
102+
if err := r.Delete(ctx, &pvcWal); err != nil {
103+
// Ignore if NotFound, otherwise report the error
104+
if !apierrs.IsNotFound(err) {
105+
return fmt.Errorf("scaling down node (wal pvc) %v: %w", sacrificialInstance.Name, err)
106+
}
87107
}
88108
}
89109

@@ -93,17 +113,16 @@ func (r *ClusterReconciler) scaleDownCluster(
93113
// This job was working against the PVC of this Pod,
94114
// let's remove it
95115
foreground := metav1.DeletePropagationForeground
96-
err = r.Delete(
116+
if err := r.Delete(
97117
ctx,
98118
&resources.jobs.Items[idx],
99119
&client.DeleteOptions{
100120
PropagationPolicy: &foreground,
101121
},
102-
)
103-
if err != nil {
122+
); err != nil {
104123
// Ignore if NotFound, otherwise report the error
105124
if !apierrs.IsNotFound(err) {
106-
return fmt.Errorf("scaling down node (job) %v: %v", sacrificialInstance.Name, err)
125+
return fmt.Errorf("scaling down node (job) %v: %w", sacrificialInstance.Name, err)
107126
}
108127
}
109128
}

controllers/cluster_scale_test.go

+115-34
Original file line numberDiff line numberDiff line change
@@ -23,44 +23,125 @@ import (
2323
corev1 "k8s.io/api/core/v1"
2424
apierrors "k8s.io/apimachinery/pkg/api/errors"
2525
"k8s.io/apimachinery/pkg/types"
26+
ctrl "sigs.k8s.io/controller-runtime/pkg/client"
27+
28+
"github.com/cloudnative-pg/cloudnative-pg/pkg/specs"
29+
"github.com/cloudnative-pg/cloudnative-pg/pkg/utils"
2630

2731
. "github.com/onsi/ginkgo/v2"
2832
. "github.com/onsi/gomega"
2933
)
3034

31-
var _ = Describe("cluster_scale unit tests", func() {
32-
It("should make sure that scale down works correctly", func() {
33-
ctx := context.Background()
34-
namespace := newFakeNamespace()
35-
cluster := newFakeCNPGCluster(namespace)
36-
37-
resources := &managedResources{
38-
pvcs: corev1.PersistentVolumeClaimList{Items: generateFakePVCWithDefaultClient(cluster)},
39-
jobs: batchv1.JobList{Items: generateFakeInitDBJobsWithDefaultClient(cluster)},
40-
instances: corev1.PodList{Items: generateFakeClusterPodsWithDefaultClient(cluster, true)},
41-
}
42-
43-
sacrificialInstanceBefore := getSacrificialInstance(resources.instances.Items)
44-
err := k8sClient.Get(
45-
ctx,
46-
types.NamespacedName{Name: sacrificialInstanceBefore.Name, Namespace: cluster.Namespace},
47-
&corev1.Pod{},
48-
)
49-
Expect(err).To(BeNil())
50-
51-
err = clusterReconciler.scaleDownCluster(
52-
ctx,
53-
cluster,
54-
resources,
55-
)
56-
Expect(err).To(BeNil())
57-
58-
sacrificialInstance := getSacrificialInstance(resources.instances.Items)
59-
err = k8sClient.Get(
60-
ctx,
61-
types.NamespacedName{Name: sacrificialInstance.Name, Namespace: cluster.Namespace},
62-
&corev1.Pod{},
63-
)
64-
Expect(apierrors.IsNotFound(err)).To(BeTrue())
35+
var _ = Describe("scale down", func() {
36+
When("there's no separate WAL storage", func() {
37+
It("delete the PGDATA PVC", func() {
38+
ctx := context.Background()
39+
namespace := newFakeNamespace()
40+
cluster := newFakeCNPGCluster(namespace)
41+
42+
resources := &managedResources{
43+
pvcs: corev1.PersistentVolumeClaimList{Items: generateFakePVCWithDefaultClient(cluster)},
44+
jobs: batchv1.JobList{Items: generateFakeInitDBJobsWithDefaultClient(cluster)},
45+
instances: corev1.PodList{Items: generateFakeClusterPodsWithDefaultClient(cluster, true)},
46+
}
47+
48+
sacrificialInstance := getSacrificialInstance(resources.instances.Items)
49+
Expect(isResourceExisting(
50+
ctx,
51+
&corev1.Pod{},
52+
types.NamespacedName{Name: sacrificialInstance.Name, Namespace: cluster.Namespace},
53+
)).To(BeTrue())
54+
Expect(isResourceExisting(
55+
ctx,
56+
&corev1.PersistentVolumeClaim{},
57+
types.NamespacedName{Name: sacrificialInstance.Name, Namespace: cluster.Namespace},
58+
)).To(BeTrue())
59+
60+
Expect(clusterReconciler.scaleDownCluster(
61+
ctx,
62+
cluster,
63+
resources,
64+
)).To(Succeed())
65+
66+
Expect(isResourceExisting(
67+
ctx,
68+
&corev1.Pod{},
69+
types.NamespacedName{Name: sacrificialInstance.Name, Namespace: cluster.Namespace},
70+
)).To(BeFalse())
71+
Expect(isResourceExisting(
72+
ctx,
73+
&corev1.PersistentVolumeClaim{},
74+
types.NamespacedName{Name: sacrificialInstance.Name, Namespace: cluster.Namespace},
75+
)).To(BeFalse())
76+
})
77+
})
78+
79+
When("WAL storage is separate", func() {
80+
It("delete the PGDATA and WAL PVC", func() {
81+
ctx := context.Background()
82+
namespace := newFakeNamespace()
83+
cluster := newFakeCNPGClusterWithPGWal(namespace)
84+
85+
resources := &managedResources{
86+
pvcs: corev1.PersistentVolumeClaimList{Items: generateFakePVCWithDefaultClient(cluster)},
87+
jobs: batchv1.JobList{Items: generateFakeInitDBJobsWithDefaultClient(cluster)},
88+
instances: corev1.PodList{Items: generateFakeClusterPodsWithDefaultClient(cluster, true)},
89+
}
90+
91+
sacrificialInstance := getSacrificialInstance(resources.instances.Items)
92+
pvcWalName := specs.GetPVCName(*cluster, sacrificialInstance.Name, utils.PVCRolePgWal)
93+
Expect(isResourceExisting(
94+
ctx,
95+
&corev1.Pod{},
96+
types.NamespacedName{Name: sacrificialInstance.Name, Namespace: cluster.Namespace},
97+
)).To(BeTrue())
98+
Expect(isResourceExisting(
99+
ctx,
100+
&corev1.PersistentVolumeClaim{},
101+
types.NamespacedName{Name: sacrificialInstance.Name, Namespace: cluster.Namespace},
102+
)).To(BeTrue())
103+
Expect(isResourceExisting(
104+
ctx,
105+
&corev1.PersistentVolumeClaim{},
106+
types.NamespacedName{Name: pvcWalName, Namespace: cluster.Namespace},
107+
)).To(BeTrue())
108+
109+
Expect(clusterReconciler.scaleDownCluster(
110+
ctx,
111+
cluster,
112+
resources,
113+
)).To(Succeed())
114+
115+
Expect(isResourceExisting(
116+
ctx,
117+
&corev1.Pod{},
118+
types.NamespacedName{Name: sacrificialInstance.Name, Namespace: cluster.Namespace},
119+
)).To(BeFalse())
120+
Expect(isResourceExisting(
121+
ctx,
122+
&corev1.PersistentVolumeClaim{},
123+
types.NamespacedName{Name: sacrificialInstance.Name, Namespace: cluster.Namespace},
124+
)).To(BeFalse())
125+
Expect(isResourceExisting(
126+
ctx,
127+
&corev1.PersistentVolumeClaim{},
128+
types.NamespacedName{Name: pvcWalName, Namespace: cluster.Namespace},
129+
)).To(BeFalse())
130+
})
65131
})
66132
})
133+
134+
// isResourceExisting check is a certain resource exists in the Kubernetes space and has not been deleted
135+
func isResourceExisting(ctx context.Context, store ctrl.Object, key ctrl.ObjectKey) (bool, error) {
136+
err := k8sClient.Get(ctx, key, store)
137+
if err != nil && apierrors.IsNotFound(err) {
138+
return false, nil
139+
}
140+
if err != nil {
141+
return false, err
142+
}
143+
if store.GetDeletionTimestamp() != nil {
144+
return false, nil
145+
}
146+
return true, nil
147+
}

controllers/suite_test.go

+59
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,57 @@ func newFakeCNPGCluster(namespace string) *apiv1.Cluster {
201201
return cluster
202202
}
203203

204+
func newFakeCNPGClusterWithPGWal(namespace string) *apiv1.Cluster {
205+
const instances int = 3
206+
name := "cluster-" + rand.String(10)
207+
caServer := fmt.Sprintf("%s-ca-server", name)
208+
caClient := fmt.Sprintf("%s-ca-client", name)
209+
210+
cluster := &apiv1.Cluster{
211+
ObjectMeta: metav1.ObjectMeta{
212+
Name: name,
213+
Namespace: namespace,
214+
},
215+
Spec: apiv1.ClusterSpec{
216+
Instances: instances,
217+
Certificates: &apiv1.CertificatesConfiguration{
218+
ServerCASecret: caServer,
219+
ClientCASecret: caClient,
220+
},
221+
StorageConfiguration: apiv1.StorageConfiguration{
222+
Size: "1G",
223+
},
224+
WalStorage: &apiv1.StorageConfiguration{
225+
Size: "1G",
226+
},
227+
},
228+
Status: apiv1.ClusterStatus{
229+
Instances: instances,
230+
SecretsResourceVersion: apiv1.SecretsResourceVersion{},
231+
ConfigMapResourceVersion: apiv1.ConfigMapResourceVersion{},
232+
Certificates: apiv1.CertificatesStatus{
233+
CertificatesConfiguration: apiv1.CertificatesConfiguration{
234+
ServerCASecret: caServer,
235+
ClientCASecret: caClient,
236+
},
237+
},
238+
},
239+
}
240+
241+
cluster.SetDefaults()
242+
243+
err := k8sClient.Create(context.Background(), cluster)
244+
Expect(err).To(BeNil())
245+
246+
// upstream issue, go client cleans typemeta: https://github.com/kubernetes/client-go/issues/308
247+
cluster.TypeMeta = metav1.TypeMeta{
248+
Kind: apiv1.ClusterKind,
249+
APIVersion: apiv1.GroupVersion.String(),
250+
}
251+
252+
return cluster
253+
}
254+
204255
func newFakeNamespace() string {
205256
name := rand.String(10)
206257

@@ -293,6 +344,14 @@ func generateFakePVC(c client.Client, cluster *apiv1.Cluster) []corev1.Persisten
293344
err = c.Create(context.Background(), pvc)
294345
Expect(err).To(BeNil())
295346
pvcs = append(pvcs, *pvc)
347+
if cluster.ShouldCreateWalArchiveVolume() {
348+
pvcWal, err := specs.CreatePVC(cluster.Spec.StorageConfiguration, *cluster, idx, utils.PVCRolePgWal)
349+
Expect(err).To(BeNil())
350+
SetClusterOwnerAnnotationsAndLabels(&pvcWal.ObjectMeta, cluster)
351+
err = c.Create(context.Background(), pvcWal)
352+
Expect(err).To(BeNil())
353+
pvcs = append(pvcs, *pvcWal)
354+
}
296355
}
297356
return pvcs
298357
}

tests/e2e/asserts_test.go

+19
Original file line numberDiff line numberDiff line change
@@ -2572,3 +2572,22 @@ func AssertClusterRollingRestart(namespace, clusterName string) {
25722572
})
25732573
AssertClusterIsReady(namespace, clusterName, 300, env)
25742574
}
2575+
2576+
// AssertPVCCount matches count and pvc List.
2577+
func AssertPVCCount(namespace, clusterName string, pvcCount int) {
2578+
By("verify cluster healthy pvc list", func() {
2579+
Eventually(func(g Gomega) {
2580+
cluster, _ := env.GetCluster(namespace, clusterName)
2581+
g.Expect(cluster.Status.PVCCount).To(BeEquivalentTo(pvcCount))
2582+
2583+
pvcList := &corev1.PersistentVolumeClaimList{}
2584+
err := env.Client.List(
2585+
env.Ctx, pvcList, ctrlclient.MatchingLabels{utils.ClusterLabelName: clusterName},
2586+
ctrlclient.InNamespace(namespace),
2587+
)
2588+
g.Expect(err).To(BeNil())
2589+
2590+
g.Expect(cluster.Status.PVCCount).To(BeEquivalentTo(len(pvcList.Items)))
2591+
}, 60, 4).Should(Succeed())
2592+
})
2593+
}

tests/e2e/scaling_test.go

+8
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ var _ = Describe("Cluster scale up and down", Serial, Label(tests.LabelReplicati
3333
sampleFileWithReplicationSlots = fixturesDir + "/base/cluster-storage-class-with-rep-slots.yaml.template"
3434
clusterName = "postgresql-storage-class"
3535
level = tests.Lowest
36+
expectedPvcCount = 6
3637
)
3738
BeforeEach(func() {
3839
if testLevelEnv.Depth < int(level) {
@@ -74,6 +75,10 @@ var _ = Describe("Cluster scale up and down", Serial, Label(tests.LabelReplicati
7475
AssertClusterIsReady(namespace, clusterName, timeout, env)
7576
})
7677
AssertClusterReplicationSlots(clusterName, namespace)
78+
79+
By("verify pvc pgWal and pgData are deleted after scale down", func() {
80+
AssertPVCCount(namespace, clusterName, expectedPvcCount)
81+
})
7782
})
7883
})
7984

@@ -108,6 +113,9 @@ var _ = Describe("Cluster scale up and down", Serial, Label(tests.LabelReplicati
108113
timeout := 60
109114
AssertClusterIsReady(namespace, clusterName, timeout, env)
110115
})
116+
By("verify pvc pgWal and pgData are deleted after scale down", func() {
117+
AssertPVCCount(namespace, clusterName, expectedPvcCount)
118+
})
111119
})
112120
})
113121
})

0 commit comments

Comments
 (0)