Skip to content

Commit 02e9171

Browse files
authored
Error status handler improvements (operator-framework#742)
1 parent 70f489f commit 02e9171

File tree

11 files changed

+146
-56
lines changed

11 files changed

+146
-56
lines changed

docs/documentation/features.md

+7-5
Original file line numberDiff line numberDiff line change
@@ -166,19 +166,21 @@ A successful execution resets the retry.
166166

167167
### Setting Error Status After Last Retry Attempt
168168

169-
In order to facilitate error reporting in case a last retry attempt fails, Reconciler can implement the following
169+
In order to facilitate error reporting Reconciler can implement the following
170170
[interface](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/reconciler/ErrorStatusHandler.java):
171171

172172
```java
173-
public interface ErrorStatusHandler<T extends CustomResource<?, ?>> {
173+
public interface ErrorStatusHandler<T extends HasMetadata> {
174174

175-
T updateErrorStatus(T resource, RuntimeException e);
175+
Optional<T> updateErrorStatus(T resource, RetryInfo retryInfo, RuntimeException e);
176176

177177
}
178178
```
179179

180-
The `updateErrorStatus` resource is called when it's the last retry attempt according the retry configuration and the
181-
reconciler execution still resulted in a runtime exception.
180+
The `updateErrorStatus` method is called in case an exception is thrown from the reconciler. It is also called when
181+
there is no retry configured, just after the reconciler execution.
182+
In the first call the `RetryInfo.getAttemptCount()` is always zero, since it is not a result of a retry
183+
(regardless if retry is configured or not).
182184

183185
The result of the method call is used to make a status update on the custom resource. This is always a sub-resource
184186
update request, so no update on custom resource itself (like spec of metadata) happens. Note that this update request

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/reconciler/ErrorStatusHandler.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package io.javaoperatorsdk.operator.api.reconciler;
22

3+
import java.util.Optional;
4+
35
import io.fabric8.kubernetes.api.model.HasMetadata;
46

57
public interface ErrorStatusHandler<T extends HasMetadata> {
@@ -19,9 +21,10 @@ public interface ErrorStatusHandler<T extends HasMetadata> {
1921
* should not be updates on custom resource after it is marked for deletion.
2022
*
2123
* @param resource to update the status on
24+
* @param retryInfo
2225
* @param e exception thrown from the reconciler
2326
* @return the updated resource
2427
*/
25-
T updateErrorStatus(T resource, RuntimeException e);
28+
Optional<T> updateErrorStatus(T resource, RetryInfo retryInfo, RuntimeException e);
2629

2730
}

operator-framework-core/src/main/java/io/javaoperatorsdk/operator/processing/event/ReconciliationDispatcher.java

+20-20
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,7 @@
1111
import io.fabric8.kubernetes.client.dsl.Resource;
1212
import io.javaoperatorsdk.operator.api.ObservedGenerationAware;
1313
import io.javaoperatorsdk.operator.api.config.ControllerConfiguration;
14-
import io.javaoperatorsdk.operator.api.reconciler.BaseControl;
15-
import io.javaoperatorsdk.operator.api.reconciler.Context;
16-
import io.javaoperatorsdk.operator.api.reconciler.DefaultContext;
17-
import io.javaoperatorsdk.operator.api.reconciler.DeleteControl;
18-
import io.javaoperatorsdk.operator.api.reconciler.ErrorStatusHandler;
19-
import io.javaoperatorsdk.operator.api.reconciler.Reconciler;
20-
import io.javaoperatorsdk.operator.api.reconciler.UpdateControl;
14+
import io.javaoperatorsdk.operator.api.reconciler.*;
2115
import io.javaoperatorsdk.operator.processing.Controller;
2216

2317
import static io.javaoperatorsdk.operator.processing.KubernetesResourceUtils.getName;
@@ -114,7 +108,7 @@ private PostExecutionControl<R> handleReconcile(
114108
cloneResourceForErrorStatusHandlerIfNeeded(originalResource, context);
115109
return reconcileExecution(executionScope, resourceForExecution, originalResource, context);
116110
} catch (RuntimeException e) {
117-
handleLastAttemptErrorStatusHandler(originalResource, context, e);
111+
handleErrorStatusHandler(originalResource, context, e);
118112
throw e;
119113
}
120114
}
@@ -128,7 +122,7 @@ private PostExecutionControl<R> handleReconcile(
128122
* place for status update.
129123
*/
130124
private R cloneResourceForErrorStatusHandlerIfNeeded(R resource, Context context) {
131-
if (isLastAttemptOfRetryAndErrorStatusHandlerPresent(context) ||
125+
if (isErrorStatusHandlerPresent() ||
132126
shouldUpdateObservedGenerationAutomatically(resource)) {
133127
return configuration().getConfigurationService().getResourceCloner().clone(resource);
134128
} else {
@@ -164,26 +158,32 @@ && shouldUpdateObservedGenerationAutomatically(resourceForExecution)) {
164158
return createPostExecutionControl(updatedCustomResource, updateControl);
165159
}
166160

167-
private void handleLastAttemptErrorStatusHandler(R resource, Context context,
161+
private void handleErrorStatusHandler(R resource, Context context,
168162
RuntimeException e) {
169-
if (isLastAttemptOfRetryAndErrorStatusHandlerPresent(context)) {
163+
if (isErrorStatusHandlerPresent()) {
170164
try {
165+
var retryInfo = context.getRetryInfo().orElse(new RetryInfo() {
166+
@Override
167+
public int getAttemptCount() {
168+
return 0;
169+
}
170+
171+
@Override
172+
public boolean isLastAttempt() {
173+
return controller.getConfiguration().getRetryConfiguration() == null;
174+
}
175+
});
171176
var updatedResource = ((ErrorStatusHandler<R>) controller.getReconciler())
172-
.updateErrorStatus(resource, e);
173-
customResourceFacade.updateStatus(updatedResource);
177+
.updateErrorStatus(resource, retryInfo, e);
178+
updatedResource.ifPresent(customResourceFacade::updateStatus);
174179
} catch (RuntimeException ex) {
175180
log.error("Error during error status handling.", ex);
176181
}
177182
}
178183
}
179184

180-
private boolean isLastAttemptOfRetryAndErrorStatusHandlerPresent(Context context) {
181-
if (context.getRetryInfo().isPresent()) {
182-
return context.getRetryInfo().get().isLastAttempt()
183-
&& controller.getReconciler() instanceof ErrorStatusHandler;
184-
} else {
185-
return false;
186-
}
185+
private boolean isErrorStatusHandlerPresent() {
186+
return controller.getReconciler() instanceof ErrorStatusHandler;
187187
}
188188

189189
private R updateStatusGenerationAware(R resource) {

operator-framework-core/src/test/java/io/javaoperatorsdk/operator/processing/event/ReconciliationDispatcherTest.java

+29-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package io.javaoperatorsdk.operator.processing.event;
22

33
import java.util.ArrayList;
4+
import java.util.Optional;
45

56
import org.junit.jupiter.api.BeforeEach;
67
import org.junit.jupiter.api.Test;
@@ -11,6 +12,7 @@
1112
import io.fabric8.kubernetes.api.model.ObjectMeta;
1213
import io.fabric8.kubernetes.client.CustomResource;
1314
import io.javaoperatorsdk.operator.TestUtils;
15+
import io.javaoperatorsdk.operator.api.config.Cloner;
1416
import io.javaoperatorsdk.operator.api.config.ConfigurationService;
1517
import io.javaoperatorsdk.operator.api.config.ControllerConfiguration;
1618
import io.javaoperatorsdk.operator.api.monitoring.Metrics;
@@ -56,9 +58,12 @@ private <R extends HasMetadata> ReconciliationDispatcher<R> init(R customResourc
5658
when(configuration.getName()).thenReturn("EventDispatcherTestController");
5759
when(configService.getMetrics()).thenReturn(Metrics.NOOP);
5860
when(configuration.getConfigurationService()).thenReturn(configService);
59-
when(configService.getResourceCloner()).thenReturn(ConfigurationService.DEFAULT_CLONER);
60-
when(reconciler.reconcile(eq(customResource), any()))
61-
.thenReturn(UpdateControl.updateResource(customResource));
61+
when(configService.getResourceCloner()).thenReturn(new Cloner() {
62+
@Override
63+
public <R extends HasMetadata> R clone(R object) {
64+
return object;
65+
}
66+
});
6267
when(reconciler.cleanup(eq(customResource), any()))
6368
.thenReturn(DeleteControl.defaultDelete());
6469
when(customResourceFacade.replaceWithLock(any())).thenReturn(null);
@@ -351,9 +356,9 @@ void callErrorStatusHandlerIfImplemented() {
351356

352357
when(reconciler.reconcile(any(), any()))
353358
.thenThrow(new IllegalStateException("Error Status Test"));
354-
when(((ErrorStatusHandler) reconciler).updateErrorStatus(any(), any())).then(a -> {
359+
when(((ErrorStatusHandler) reconciler).updateErrorStatus(any(), any(), any())).then(a -> {
355360
testCustomResource.getStatus().setConfigMapStatus(ERROR_MESSAGE);
356-
return testCustomResource;
361+
return Optional.of(testCustomResource);
357362
});
358363

359364
reconciliationDispatcher.handleExecution(
@@ -373,7 +378,25 @@ public boolean isLastAttempt() {
373378

374379
verify(customResourceFacade, times(1)).updateStatus(testCustomResource);
375380
verify(((ErrorStatusHandler) reconciler), times(1)).updateErrorStatus(eq(testCustomResource),
376-
any());
381+
any(), any());
382+
}
383+
384+
@Test
385+
void callErrorStatusHandlerEvenOnFirstError() {
386+
testCustomResource.addFinalizer(DEFAULT_FINALIZER);
387+
388+
when(reconciler.reconcile(any(), any()))
389+
.thenThrow(new IllegalStateException("Error Status Test"));
390+
when(((ErrorStatusHandler) reconciler).updateErrorStatus(any(), any(), any())).then(a -> {
391+
testCustomResource.getStatus().setConfigMapStatus(ERROR_MESSAGE);
392+
return Optional.of(testCustomResource);
393+
});
394+
reconciliationDispatcher.handleExecution(
395+
new ExecutionScope(
396+
testCustomResource, null));
397+
verify(customResourceFacade, times(1)).updateStatus(testCustomResource);
398+
verify(((ErrorStatusHandler) reconciler), times(1)).updateErrorStatus(eq(testCustomResource),
399+
any(), any());
377400
}
378401

379402
private ObservedGenCustomResource createObservedGenCustomResource() {

operator-framework/src/test/java/io/javaoperatorsdk/operator/ErrorStatusHandlerIT.java

+9-5
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@
1717

1818
public class ErrorStatusHandlerIT {
1919

20+
public static final int MAX_RETRY_ATTEMPTS = 3;
2021
ErrorStatusHandlerTestReconciler reconciler = new ErrorStatusHandlerTestReconciler();
2122

2223
@RegisterExtension
2324
OperatorExtension operator =
2425
OperatorExtension.builder()
2526
.withConfigurationService(DefaultConfigurationService.instance())
26-
.withReconciler(reconciler, new GenericRetry().setMaxAttempts(3).withLinearRetry())
27+
.withReconciler(reconciler,
28+
new GenericRetry().setMaxAttempts(MAX_RETRY_ATTEMPTS).withLinearRetry())
2729
.build();
2830

2931
@Test
@@ -32,16 +34,18 @@ public void testErrorMessageSetEventually() {
3234
operator.create(ErrorStatusHandlerTestCustomResource.class, createCustomResource());
3335

3436
await()
35-
.atMost(15, TimeUnit.SECONDS)
36-
.pollInterval(1L, TimeUnit.SECONDS)
37+
.atMost(10, TimeUnit.SECONDS)
38+
.pollInterval(250, TimeUnit.MICROSECONDS)
3739
.untilAsserted(
3840
() -> {
3941
ErrorStatusHandlerTestCustomResource res =
4042
operator.get(ErrorStatusHandlerTestCustomResource.class,
4143
resource.getMetadata().getName());
4244
assertThat(res.getStatus()).isNotNull();
43-
assertThat(res.getStatus().getMessage())
44-
.isEqualTo(ErrorStatusHandlerTestReconciler.ERROR_STATUS_MESSAGE);
45+
for (int i = 0; i < MAX_RETRY_ATTEMPTS + 1; i++) {
46+
assertThat(res.getStatus().getMessages())
47+
.contains(ErrorStatusHandlerTestReconciler.ERROR_STATUS_MESSAGE + i);
48+
}
4549
});
4650
}
4751

operator-framework/src/test/java/io/javaoperatorsdk/operator/RetryIT.java

+8-5
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,18 @@
2020

2121
public class RetryIT {
2222
public static final int RETRY_INTERVAL = 150;
23+
public static final int MAX_RETRY_ATTEMPTS = 5;
24+
25+
public static final int NUMBER_FAILED_EXECUTIONS = 3;
2326

2427
@RegisterExtension
2528
OperatorExtension operator =
2629
OperatorExtension.builder()
2730
.withConfigurationService(DefaultConfigurationService.instance())
2831
.withReconciler(
29-
new RetryTestCustomReconciler(),
32+
new RetryTestCustomReconciler(NUMBER_FAILED_EXECUTIONS),
3033
new GenericRetry().setInitialInterval(RETRY_INTERVAL).withLinearRetry()
31-
.setMaxAttempts(5))
34+
.setMaxAttempts(MAX_RETRY_ATTEMPTS))
3235
.build();
3336

3437

@@ -40,7 +43,7 @@ public void retryFailedExecution() {
4043

4144
await("cr status updated")
4245
.pollDelay(
43-
RETRY_INTERVAL * (RetryTestCustomReconciler.NUMBER_FAILED_EXECUTIONS + 2),
46+
RETRY_INTERVAL * (NUMBER_FAILED_EXECUTIONS + 2),
4447
TimeUnit.MILLISECONDS)
4548
.pollInterval(
4649
RETRY_INTERVAL,
@@ -49,7 +52,7 @@ public void retryFailedExecution() {
4952
.untilAsserted(() -> {
5053
assertThat(
5154
TestUtils.getNumberOfExecutions(operator))
52-
.isEqualTo(RetryTestCustomReconciler.NUMBER_FAILED_EXECUTIONS + 1);
55+
.isEqualTo(NUMBER_FAILED_EXECUTIONS + 1);
5356

5457
RetryTestCustomResource finalResource =
5558
operator.get(RetryTestCustomResource.class,
@@ -59,7 +62,7 @@ public void retryFailedExecution() {
5962
});
6063
}
6164

62-
public RetryTestCustomResource createTestCustomResource(String id) {
65+
public static RetryTestCustomResource createTestCustomResource(String id) {
6366
RetryTestCustomResource resource = new RetryTestCustomResource();
6467
resource.setMetadata(
6568
new ObjectMetaBuilder()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package io.javaoperatorsdk.operator;
2+
3+
import org.junit.jupiter.api.Test;
4+
import org.junit.jupiter.api.extension.RegisterExtension;
5+
6+
import io.javaoperatorsdk.operator.config.runtime.DefaultConfigurationService;
7+
import io.javaoperatorsdk.operator.junit.OperatorExtension;
8+
import io.javaoperatorsdk.operator.processing.retry.GenericRetry;
9+
import io.javaoperatorsdk.operator.sample.retry.RetryTestCustomReconciler;
10+
import io.javaoperatorsdk.operator.sample.retry.RetryTestCustomResource;
11+
12+
import static io.javaoperatorsdk.operator.RetryIT.createTestCustomResource;
13+
import static org.assertj.core.api.Assertions.assertThat;
14+
15+
public class RetryMaxAttemptIT {
16+
17+
public static final int MAX_RETRY_ATTEMPTS = 3;
18+
public static final int RETRY_INTERVAL = 100;
19+
public static final int ALL_EXECUTION_TO_FAIL = 99;
20+
21+
RetryTestCustomReconciler reconciler = new RetryTestCustomReconciler(ALL_EXECUTION_TO_FAIL);
22+
23+
@RegisterExtension
24+
OperatorExtension operator =
25+
OperatorExtension.builder()
26+
.withConfigurationService(DefaultConfigurationService.instance())
27+
.withReconciler(reconciler,
28+
new GenericRetry().setInitialInterval(RETRY_INTERVAL).withLinearRetry()
29+
.setMaxAttempts(MAX_RETRY_ATTEMPTS))
30+
.build();
31+
32+
33+
@Test
34+
public void retryFailedExecution() throws InterruptedException {
35+
RetryTestCustomResource resource = createTestCustomResource("max-retry");
36+
37+
operator.create(RetryTestCustomResource.class, resource);
38+
39+
Thread.sleep((MAX_RETRY_ATTEMPTS + 2) * RETRY_INTERVAL);
40+
assertThat(reconciler.getNumberOfExecutions()).isEqualTo(4);
41+
}
42+
}
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
11
package io.javaoperatorsdk.operator.sample.errorstatushandler;
22

3+
import java.util.ArrayList;
4+
import java.util.List;
5+
36
public class ErrorStatusHandlerTestCustomResourceStatus {
47

5-
private String message;
8+
private List<String> messages;
69

7-
public String getMessage() {
8-
return message;
10+
public List<String> getMessages() {
11+
if (messages == null) {
12+
messages = new ArrayList<>();
13+
}
14+
return messages;
915
}
1016

11-
public ErrorStatusHandlerTestCustomResourceStatus setMessage(String message) {
12-
this.message = message;
17+
public ErrorStatusHandlerTestCustomResourceStatus setMessages(List<String> messages) {
18+
this.messages = messages;
1319
return this;
1420
}
1521
}

operator-framework/src/test/java/io/javaoperatorsdk/operator/sample/errorstatushandler/ErrorStatusHandlerTestReconciler.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package io.javaoperatorsdk.operator.sample.errorstatushandler;
22

3+
import java.util.Optional;
34
import java.util.concurrent.atomic.AtomicInteger;
45

56
import org.slf4j.Logger;
@@ -45,11 +46,11 @@ public int getNumberOfExecutions() {
4546
}
4647

4748
@Override
48-
public ErrorStatusHandlerTestCustomResource updateErrorStatus(
49-
ErrorStatusHandlerTestCustomResource resource, RuntimeException e) {
49+
public Optional<ErrorStatusHandlerTestCustomResource> updateErrorStatus(
50+
ErrorStatusHandlerTestCustomResource resource, RetryInfo retryInfo, RuntimeException e) {
5051
log.info("Setting status.");
5152
ensureStatusExists(resource);
52-
resource.getStatus().setMessage(ERROR_STATUS_MESSAGE);
53-
return resource;
53+
resource.getStatus().getMessages().add(ERROR_STATUS_MESSAGE + retryInfo.getAttemptCount());
54+
return Optional.of(resource);
5455
}
5556
}

0 commit comments

Comments
 (0)