40
40
41
41
import com .cloud .configuration .Config ;
42
42
import com .cloud .utils .NumbersUtil ;
43
+ import com .cloud .utils .db .GlobalLock ;
43
44
import org .apache .cloudstack .agent .lb .IndirectAgentLB ;
44
45
import org .apache .cloudstack .ca .CAManager ;
45
46
import org .apache .cloudstack .engine .orchestration .service .NetworkOrchestrationService ;
@@ -799,49 +800,65 @@ public boolean stop() {
799
800
return true ;
800
801
}
801
802
803
+ protected Status getNextStatusOnDisconnection (Host host , final Status .Event event ) {
804
+ final Status currentStatus = host .getStatus ();
805
+ Status nextStatus ;
806
+ if (currentStatus == Status .Down || currentStatus == Status .Alert || currentStatus == Status .Removed ) {
807
+ if (s_logger .isDebugEnabled ()) {
808
+ s_logger .debug (String .format ("Host %s is already %s" , host .getUuid (), currentStatus ));
809
+ }
810
+ nextStatus = currentStatus ;
811
+ } else {
812
+ try {
813
+ nextStatus = currentStatus .getNextStatus (event );
814
+ } catch (final NoTransitionException e ) {
815
+ final String err = String .format ("Cannot find next status for %s as current status is %s for agent %s" , event , currentStatus , host .getUuid ());
816
+ s_logger .debug (err );
817
+ throw new CloudRuntimeException (err );
818
+ }
819
+
820
+ if (s_logger .isDebugEnabled ()) {
821
+ s_logger .debug (String .format ("The next status of agent %s is %s, current status is %s" , host .getUuid (), nextStatus , currentStatus ));
822
+ }
823
+ }
824
+ return nextStatus ;
825
+ }
826
+
802
827
protected boolean handleDisconnectWithoutInvestigation (final AgentAttache attache , final Status .Event event , final boolean transitState , final boolean removeAgent ) {
803
828
final long hostId = attache .getId ();
804
829
805
- s_logger .info ("Host " + hostId + " is disconnecting with event " + event );
806
- Status nextStatus = null ;
807
- final HostVO host = _hostDao .findById (hostId );
808
- if (host == null ) {
809
- s_logger .warn ("Can't find host with " + hostId );
810
- nextStatus = Status .Removed ;
811
- } else {
812
- final Status currentStatus = host .getStatus ();
813
- if (currentStatus == Status .Down || currentStatus == Status .Alert || currentStatus == Status .Removed ) {
814
- if (s_logger .isDebugEnabled ()) {
815
- s_logger .debug ("Host " + hostId + " is already " + currentStatus );
816
- }
817
- nextStatus = currentStatus ;
818
- } else {
819
- try {
820
- nextStatus = currentStatus .getNextStatus (event );
821
- } catch (final NoTransitionException e ) {
822
- final String err = "Cannot find next status for " + event + " as current status is " + currentStatus + " for agent " + hostId ;
823
- s_logger .debug (err );
824
- throw new CloudRuntimeException (err );
830
+ boolean result = false ;
831
+ GlobalLock joinLock = getHostJoinLock (hostId );
832
+ if (joinLock .lock (60 )) {
833
+ try {
834
+ s_logger .info (String .format ("Host %d is disconnecting with event %s" , hostId , event ));
835
+ Status nextStatus = null ;
836
+ final HostVO host = _hostDao .findById (hostId );
837
+ if (host == null ) {
838
+ s_logger .warn (String .format ("Can't find host with %d" , hostId ));
839
+ nextStatus = Status .Removed ;
840
+ } else {
841
+ nextStatus = getNextStatusOnDisconnection (host , event );
842
+ caService .purgeHostCertificate (host );
825
843
}
826
844
827
845
if (s_logger .isDebugEnabled ()) {
828
- s_logger .debug ("The next status of agent " + hostId + "is " + nextStatus + ", current status is " + currentStatus );
846
+ s_logger .debug (String . format ( "Deregistering link for %d with state %s" , hostId , nextStatus ) );
829
847
}
830
- }
831
- caService .purgeHostCertificate (host );
832
- }
833
848
834
- if (s_logger .isDebugEnabled ()) {
835
- s_logger .debug ("Deregistering link for " + hostId + " with state " + nextStatus );
836
- }
849
+ removeAgent (attache , nextStatus );
837
850
838
- removeAgent (attache , nextStatus );
839
- // update the DB
840
- if (host != null && transitState ) {
841
- disconnectAgent (host , event , _nodeId );
851
+ if (host != null && transitState ) {
852
+ // update the state for host in DB as per the event
853
+ disconnectAgent (host , event , _nodeId );
854
+ }
855
+ } finally {
856
+ joinLock .unlock ();
857
+ }
858
+ result = true ;
842
859
}
843
-
844
- return true ;
860
+ joinLock . releaseRef ();
861
+ return result ;
845
862
}
846
863
847
864
protected boolean handleDisconnectWithInvestigation (final AgentAttache attache , Status .Event event ) {
@@ -1102,26 +1119,23 @@ protected AgentAttache createAttacheForConnect(final HostVO host, final Link lin
1102
1119
return attache ;
1103
1120
}
1104
1121
1105
- private AgentAttache handleConnectedAgent (final Link link , final StartupCommand [] startup , final Request request ) {
1106
- AgentAttache attache = null ;
1107
- ReadyCommand ready = null ;
1108
- try {
1109
- final List <String > agentMSHostList = new ArrayList <>();
1110
- String lbAlgorithm = null ;
1111
- if (startup != null && startup .length > 0 ) {
1112
- final String agentMSHosts = startup [0 ].getMsHostList ();
1113
- if (StringUtils .isNotEmpty (agentMSHosts )) {
1114
- String [] msHosts = agentMSHosts .split ("@" );
1115
- if (msHosts .length > 1 ) {
1116
- lbAlgorithm = msHosts [1 ];
1117
- }
1118
- agentMSHostList .addAll (Arrays .asList (msHosts [0 ].split ("," )));
1122
+ private AgentAttache sendReadyAndGetAttache (HostVO host , ReadyCommand ready , Link link , StartupCommand [] startup ) throws ConnectionException {
1123
+ final List <String > agentMSHostList = new ArrayList <>();
1124
+ String lbAlgorithm = null ;
1125
+ if (startup != null && startup .length > 0 ) {
1126
+ final String agentMSHosts = startup [0 ].getMsHostList ();
1127
+ if (StringUtils .isNotEmpty (agentMSHosts )) {
1128
+ String [] msHosts = agentMSHosts .split ("@" );
1129
+ if (msHosts .length > 1 ) {
1130
+ lbAlgorithm = msHosts [1 ];
1119
1131
}
1132
+ agentMSHostList .addAll (Arrays .asList (msHosts [0 ].split ("," )));
1120
1133
}
1121
-
1122
- final HostVO host = _resourceMgr .createHostVOForConnectedAgent (startup );
1123
- if (host != null ) {
1124
- ready = new ReadyCommand (host .getDataCenterId (), host .getId (), NumbersUtil .enableHumanReadableSizes );
1134
+ }
1135
+ AgentAttache attache = null ;
1136
+ GlobalLock joinLock = getHostJoinLock (host .getId ());
1137
+ if (joinLock .lock (60 )) {
1138
+ try {
1125
1139
1126
1140
if (!indirectAgentLB .compareManagementServerList (host .getId (), host .getDataCenterId (), agentMSHostList , lbAlgorithm )) {
1127
1141
final List <String > newMSList = indirectAgentLB .getManagementServerList (host .getId (), host .getDataCenterId (), null );
@@ -1133,6 +1147,24 @@ private AgentAttache handleConnectedAgent(final Link link, final StartupCommand[
1133
1147
1134
1148
attache = createAttacheForConnect (host , link );
1135
1149
attache = notifyMonitorsOfConnection (attache , startup , false );
1150
+ } finally {
1151
+ joinLock .unlock ();
1152
+ }
1153
+ } else {
1154
+ throw new ConnectionException (true , "Unable to acquire lock on host " + host .getUuid ());
1155
+ }
1156
+ joinLock .releaseRef ();
1157
+ return attache ;
1158
+ }
1159
+
1160
+ private AgentAttache handleConnectedAgent (final Link link , final StartupCommand [] startup , final Request request ) {
1161
+ AgentAttache attache = null ;
1162
+ ReadyCommand ready = null ;
1163
+ try {
1164
+ final HostVO host = _resourceMgr .createHostVOForConnectedAgent (startup );
1165
+ if (host != null ) {
1166
+ ready = new ReadyCommand (host .getDataCenterId (), host .getId (), NumbersUtil .enableHumanReadableSizes );
1167
+ attache = sendReadyAndGetAttache (host , ready , link , startup );
1136
1168
}
1137
1169
} catch (final Exception e ) {
1138
1170
s_logger .debug ("Failed to handle host connection: " , e );
@@ -1312,6 +1344,8 @@ protected void processRequest(final Link link, final Request request) {
1312
1344
connectAgent (link , cmds , request );
1313
1345
}
1314
1346
return ;
1347
+ } else if (cmd instanceof StartupCommand ) {
1348
+ connectAgent (link , cmds , request );
1315
1349
}
1316
1350
1317
1351
final long hostId = attache .getId ();
@@ -1366,7 +1400,10 @@ protected void processRequest(final Link link, final Request request) {
1366
1400
handleCommands (attache , request .getSequence (), new Command [] {cmd });
1367
1401
if (cmd instanceof PingCommand ) {
1368
1402
final long cmdHostId = ((PingCommand )cmd ).getHostId ();
1403
+ boolean requestStartupCommand = false ;
1369
1404
1405
+ final HostVO host = _hostDao .findById (Long .valueOf (cmdHostId ));
1406
+ boolean gatewayAccessible = true ;
1370
1407
// if the router is sending a ping, verify the
1371
1408
// gateway was pingable
1372
1409
if (cmd instanceof PingRoutingCommand ) {
@@ -1391,7 +1428,10 @@ protected void processRequest(final Link link, final Request request) {
1391
1428
s_logger .debug ("Not processing " + PingRoutingCommand .class .getSimpleName () + " for agent id=" + cmdHostId + "; can't find the host in the DB" );
1392
1429
}
1393
1430
}
1394
- answer = new PingAnswer ((PingCommand )cmd );
1431
+ if (host != null && host .getStatus () != Status .Up && gatewayAccessible ) {
1432
+ requestStartupCommand = true ;
1433
+ }
1434
+ answer = new PingAnswer ((PingCommand )cmd , requestStartupCommand );
1395
1435
} else if (cmd instanceof ReadyAnswer ) {
1396
1436
final HostVO host = _hostDao .findById (attache .getId ());
1397
1437
if (host == null ) {
@@ -1913,4 +1953,8 @@ public void propagateChangeToAgents(Map<String, String> params) {
1913
1953
sendCommandToAgents (hostsPerZone , params );
1914
1954
}
1915
1955
}
1956
+
1957
+ private GlobalLock getHostJoinLock (Long hostId ) {
1958
+ return GlobalLock .getInternLock (String .format ("%s-%s" , "Host-Join" , hostId ));
1959
+ }
1916
1960
}
0 commit comments