@@ -24,12 +24,10 @@ import (
24
24
"context"
25
25
"time"
26
26
27
- "github.com/arangodb/kube-arangodb/pkg/util/globals"
28
-
29
27
"github.com/arangodb/kube-arangodb/pkg/util/errors"
30
28
31
29
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
32
- "github.com/arangodb/kube-arangodb/pkg/util/arangod "
30
+ "github.com/arangodb/kube-arangodb/pkg/deployment/agency "
33
31
)
34
32
35
33
const (
@@ -74,10 +72,8 @@ func (r *Resilience) CheckMemberFailure(ctx context.Context) error {
74
72
if m .IsNotReadySince (time .Now ().Add (- notReadySinceGracePeriod )) {
75
73
// Member has terminated too often in recent history.
76
74
77
- failureAcceptable , reason , err := r .isMemberFailureAcceptable (ctx , group , m )
78
- if err != nil {
79
- log .Err (err ).Warn ("Failed to check is member failure is acceptable" )
80
- } else if failureAcceptable {
75
+ failureAcceptable , reason := r .isMemberFailureAcceptable (group , m )
76
+ if failureAcceptable {
81
77
log .Info ("Member is not ready for long time, marking is failed" )
82
78
m .Phase = api .MemberPhaseFailed
83
79
status .Members .Update (m , group )
@@ -93,10 +89,8 @@ func (r *Resilience) CheckMemberFailure(ctx context.Context) error {
93
89
count := m .RecentTerminationsSince (time .Now ().Add (- recentTerminationsSinceGracePeriod ))
94
90
if count >= recentTerminationThreshold {
95
91
// Member has terminated too often in recent history.
96
- failureAcceptable , reason , err := r .isMemberFailureAcceptable (ctx , group , m )
97
- if err != nil {
98
- log .Err (err ).Warn ("Failed to check is member failure is acceptable" )
99
- } else if failureAcceptable {
92
+ failureAcceptable , reason := r .isMemberFailureAcceptable (group , m )
93
+ if failureAcceptable {
100
94
log .Info ("Member has terminated too often in recent history, marking is failed" )
101
95
m .Phase = api .MemberPhaseFailed
102
96
status .Members .Update (m , group )
@@ -123,42 +117,46 @@ func (r *Resilience) CheckMemberFailure(ctx context.Context) error {
123
117
124
118
// isMemberFailureAcceptable checks if it is currently acceptable to switch the phase of the given member
125
119
// to failed, which means that it will be replaced.
126
- // Return: failureAcceptable, notAcceptableReason, error
127
- func (r * Resilience ) isMemberFailureAcceptable (ctx context. Context , group api.ServerGroup , m api.MemberStatus ) (bool , string , error ) {
120
+ // Return: failureAcceptable, notAcceptableReason
121
+ func (r * Resilience ) isMemberFailureAcceptable (group api.ServerGroup , m api.MemberStatus ) (bool , string ) {
128
122
129
123
switch group {
130
124
case api .ServerGroupAgents :
131
125
agencyHealth , ok := r .context .GetAgencyHealth ()
132
126
if ! ok {
133
- return false , "AgencyHealth is not present" , nil
127
+ return false , "AgencyHealth is not present"
134
128
}
135
129
136
130
if err := agencyHealth .Healthy (); err != nil {
137
- return false , err .Error (), nil
131
+ return false , err .Error ()
138
132
}
139
133
140
- return true , "" , nil
134
+ return true , ""
141
135
case api .ServerGroupDBServers :
142
- ctxChild , cancel := globals .GetGlobalTimeouts ().ArangoD ().WithTimeout (ctx )
143
- defer cancel ()
144
- client , err := r .context .GetDatabaseClient (ctxChild )
145
- if err != nil {
146
- return false , "" , errors .WithStack (err )
136
+ agencyState , ok := r .context .GetAgencyCache ()
137
+ if ! ok {
138
+ return false , "AgencyHealth is not present"
139
+ }
140
+
141
+ if agencyState .Plan .Collections .IsDBServerPresent (agency .Server (m .ID )) {
142
+ return false , "DBServer still in Plan"
147
143
}
148
- if err := arangod .IsDBServerEmpty (ctx , m .ID , client ); err != nil {
149
- return false , err .Error (), nil
144
+
145
+ if agencyState .Current .Collections .IsDBServerPresent (agency .Server (m .ID )) {
146
+ return false , "DBServer still in Current"
150
147
}
151
- return true , "" , nil
148
+
149
+ return true , ""
152
150
case api .ServerGroupCoordinators :
153
151
// Coordinators can be replaced at will
154
- return true , "" , nil
152
+ return true , ""
155
153
case api .ServerGroupSyncMasters , api .ServerGroupSyncWorkers :
156
154
// Sync masters & workers can be replaced at will
157
- return true , "" , nil
155
+ return true , ""
158
156
case api .ServerGroupSingle :
159
- return false , "ServerGroupSingle can not marked as a failed" , nil
157
+ return false , "ServerGroupSingle can not marked as a failed"
160
158
default :
161
159
// TODO
162
- return false , "TODO" , nil
160
+ return false , "TODO"
163
161
}
164
162
}
0 commit comments