Skip to content

Commit a15cb81

Browse files
committed
Merge remote-tracking branch 'apache/4.18' into main
2 parents 78213da + e6f048b commit a15cb81

File tree

7 files changed

+250
-76
lines changed

7 files changed

+250
-76
lines changed

agent/src/main/java/com/cloud/agent/Agent.java

+4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
import com.cloud.resource.AgentStatusUpdater;
4444
import com.cloud.resource.ResourceStatusUpdater;
45+
import com.cloud.agent.api.PingAnswer;
4546
import com.cloud.utils.NumbersUtil;
4647
import org.apache.cloudstack.agent.lb.SetupMSListAnswer;
4748
import org.apache.cloudstack.agent.lb.SetupMSListCommand;
@@ -842,6 +843,9 @@ public void processResponse(final Response response, final Link link) {
842843
listener.processControlResponse(response, (AgentControlAnswer)answer);
843844
}
844845
}
846+
} else if (answer instanceof PingAnswer && (((PingAnswer) answer).isSendStartup()) && _reconnectAllowed) {
847+
s_logger.info("Management server requested startup command to reinitialize the agent");
848+
sendStartup(link);
845849
} else {
846850
setLastPingResponseTime();
847851
}

core/src/main/java/com/cloud/agent/api/PingAnswer.java

+12-1
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,26 @@
2222
public class PingAnswer extends Answer {
2323
private PingCommand _command = null;
2424

25+
private boolean sendStartup = false;
26+
2527
protected PingAnswer() {
2628
}
2729

28-
public PingAnswer(PingCommand cmd) {
30+
public PingAnswer(PingCommand cmd, boolean sendStartup) {
2931
super(cmd);
3032
_command = cmd;
33+
this.sendStartup = sendStartup;
3134
}
3235

3336
public PingCommand getCommand() {
3437
return _command;
3538
}
39+
40+
public boolean isSendStartup() {
41+
return sendStartup;
42+
}
43+
44+
public void setSendStartup(boolean sendStartup) {
45+
this.sendStartup = sendStartup;
46+
}
3647
}

engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java

+96-52
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
import com.cloud.configuration.Config;
4242
import com.cloud.utils.NumbersUtil;
43+
import com.cloud.utils.db.GlobalLock;
4344
import org.apache.cloudstack.agent.lb.IndirectAgentLB;
4445
import org.apache.cloudstack.ca.CAManager;
4546
import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService;
@@ -799,49 +800,65 @@ public boolean stop() {
799800
return true;
800801
}
801802

803+
protected Status getNextStatusOnDisconnection(Host host, final Status.Event event) {
804+
final Status currentStatus = host.getStatus();
805+
Status nextStatus;
806+
if (currentStatus == Status.Down || currentStatus == Status.Alert || currentStatus == Status.Removed) {
807+
if (s_logger.isDebugEnabled()) {
808+
s_logger.debug(String.format("Host %s is already %s", host.getUuid(), currentStatus));
809+
}
810+
nextStatus = currentStatus;
811+
} else {
812+
try {
813+
nextStatus = currentStatus.getNextStatus(event);
814+
} catch (final NoTransitionException e) {
815+
final String err = String.format("Cannot find next status for %s as current status is %s for agent %s", event, currentStatus, host.getUuid());
816+
s_logger.debug(err);
817+
throw new CloudRuntimeException(err);
818+
}
819+
820+
if (s_logger.isDebugEnabled()) {
821+
s_logger.debug(String.format("The next status of agent %s is %s, current status is %s", host.getUuid(), nextStatus, currentStatus));
822+
}
823+
}
824+
return nextStatus;
825+
}
826+
802827
protected boolean handleDisconnectWithoutInvestigation(final AgentAttache attache, final Status.Event event, final boolean transitState, final boolean removeAgent) {
803828
final long hostId = attache.getId();
804829

805-
s_logger.info("Host " + hostId + " is disconnecting with event " + event);
806-
Status nextStatus = null;
807-
final HostVO host = _hostDao.findById(hostId);
808-
if (host == null) {
809-
s_logger.warn("Can't find host with " + hostId);
810-
nextStatus = Status.Removed;
811-
} else {
812-
final Status currentStatus = host.getStatus();
813-
if (currentStatus == Status.Down || currentStatus == Status.Alert || currentStatus == Status.Removed) {
814-
if (s_logger.isDebugEnabled()) {
815-
s_logger.debug("Host " + hostId + " is already " + currentStatus);
816-
}
817-
nextStatus = currentStatus;
818-
} else {
819-
try {
820-
nextStatus = currentStatus.getNextStatus(event);
821-
} catch (final NoTransitionException e) {
822-
final String err = "Cannot find next status for " + event + " as current status is " + currentStatus + " for agent " + hostId;
823-
s_logger.debug(err);
824-
throw new CloudRuntimeException(err);
830+
boolean result = false;
831+
GlobalLock joinLock = getHostJoinLock(hostId);
832+
if (joinLock.lock(60)) {
833+
try {
834+
s_logger.info(String.format("Host %d is disconnecting with event %s", hostId, event));
835+
Status nextStatus = null;
836+
final HostVO host = _hostDao.findById(hostId);
837+
if (host == null) {
838+
s_logger.warn(String.format("Can't find host with %d", hostId));
839+
nextStatus = Status.Removed;
840+
} else {
841+
nextStatus = getNextStatusOnDisconnection(host, event);
842+
caService.purgeHostCertificate(host);
825843
}
826844

827845
if (s_logger.isDebugEnabled()) {
828-
s_logger.debug("The next status of agent " + hostId + "is " + nextStatus + ", current status is " + currentStatus);
846+
s_logger.debug(String.format("Deregistering link for %d with state %s", hostId, nextStatus));
829847
}
830-
}
831-
caService.purgeHostCertificate(host);
832-
}
833848

834-
if (s_logger.isDebugEnabled()) {
835-
s_logger.debug("Deregistering link for " + hostId + " with state " + nextStatus);
836-
}
849+
removeAgent(attache, nextStatus);
837850

838-
removeAgent(attache, nextStatus);
839-
// update the DB
840-
if (host != null && transitState) {
841-
disconnectAgent(host, event, _nodeId);
851+
if (host != null && transitState) {
852+
// update the state for host in DB as per the event
853+
disconnectAgent(host, event, _nodeId);
854+
}
855+
} finally {
856+
joinLock.unlock();
857+
}
858+
result = true;
842859
}
843-
844-
return true;
860+
joinLock.releaseRef();
861+
return result;
845862
}
846863

847864
protected boolean handleDisconnectWithInvestigation(final AgentAttache attache, Status.Event event) {
@@ -1102,26 +1119,23 @@ protected AgentAttache createAttacheForConnect(final HostVO host, final Link lin
11021119
return attache;
11031120
}
11041121

1105-
private AgentAttache handleConnectedAgent(final Link link, final StartupCommand[] startup, final Request request) {
1106-
AgentAttache attache = null;
1107-
ReadyCommand ready = null;
1108-
try {
1109-
final List<String> agentMSHostList = new ArrayList<>();
1110-
String lbAlgorithm = null;
1111-
if (startup != null && startup.length > 0) {
1112-
final String agentMSHosts = startup[0].getMsHostList();
1113-
if (StringUtils.isNotEmpty(agentMSHosts)) {
1114-
String[] msHosts = agentMSHosts.split("@");
1115-
if (msHosts.length > 1) {
1116-
lbAlgorithm = msHosts[1];
1117-
}
1118-
agentMSHostList.addAll(Arrays.asList(msHosts[0].split(",")));
1122+
private AgentAttache sendReadyAndGetAttache(HostVO host, ReadyCommand ready, Link link, StartupCommand[] startup) throws ConnectionException {
1123+
final List<String> agentMSHostList = new ArrayList<>();
1124+
String lbAlgorithm = null;
1125+
if (startup != null && startup.length > 0) {
1126+
final String agentMSHosts = startup[0].getMsHostList();
1127+
if (StringUtils.isNotEmpty(agentMSHosts)) {
1128+
String[] msHosts = agentMSHosts.split("@");
1129+
if (msHosts.length > 1) {
1130+
lbAlgorithm = msHosts[1];
11191131
}
1132+
agentMSHostList.addAll(Arrays.asList(msHosts[0].split(",")));
11201133
}
1121-
1122-
final HostVO host = _resourceMgr.createHostVOForConnectedAgent(startup);
1123-
if (host != null) {
1124-
ready = new ReadyCommand(host.getDataCenterId(), host.getId(), NumbersUtil.enableHumanReadableSizes);
1134+
}
1135+
AgentAttache attache = null;
1136+
GlobalLock joinLock = getHostJoinLock(host.getId());
1137+
if (joinLock.lock(60)) {
1138+
try {
11251139

11261140
if (!indirectAgentLB.compareManagementServerList(host.getId(), host.getDataCenterId(), agentMSHostList, lbAlgorithm)) {
11271141
final List<String> newMSList = indirectAgentLB.getManagementServerList(host.getId(), host.getDataCenterId(), null);
@@ -1133,6 +1147,24 @@ private AgentAttache handleConnectedAgent(final Link link, final StartupCommand[
11331147

11341148
attache = createAttacheForConnect(host, link);
11351149
attache = notifyMonitorsOfConnection(attache, startup, false);
1150+
} finally {
1151+
joinLock.unlock();
1152+
}
1153+
} else {
1154+
throw new ConnectionException(true, "Unable to acquire lock on host " + host.getUuid());
1155+
}
1156+
joinLock.releaseRef();
1157+
return attache;
1158+
}
1159+
1160+
private AgentAttache handleConnectedAgent(final Link link, final StartupCommand[] startup, final Request request) {
1161+
AgentAttache attache = null;
1162+
ReadyCommand ready = null;
1163+
try {
1164+
final HostVO host = _resourceMgr.createHostVOForConnectedAgent(startup);
1165+
if (host != null) {
1166+
ready = new ReadyCommand(host.getDataCenterId(), host.getId(), NumbersUtil.enableHumanReadableSizes);
1167+
attache = sendReadyAndGetAttache(host, ready, link, startup);
11361168
}
11371169
} catch (final Exception e) {
11381170
s_logger.debug("Failed to handle host connection: ", e);
@@ -1312,6 +1344,8 @@ protected void processRequest(final Link link, final Request request) {
13121344
connectAgent(link, cmds, request);
13131345
}
13141346
return;
1347+
} else if (cmd instanceof StartupCommand) {
1348+
connectAgent(link, cmds, request);
13151349
}
13161350

13171351
final long hostId = attache.getId();
@@ -1366,7 +1400,10 @@ protected void processRequest(final Link link, final Request request) {
13661400
handleCommands(attache, request.getSequence(), new Command[] {cmd});
13671401
if (cmd instanceof PingCommand) {
13681402
final long cmdHostId = ((PingCommand)cmd).getHostId();
1403+
boolean requestStartupCommand = false;
13691404

1405+
final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId));
1406+
boolean gatewayAccessible = true;
13701407
// if the router is sending a ping, verify the
13711408
// gateway was pingable
13721409
if (cmd instanceof PingRoutingCommand) {
@@ -1391,7 +1428,10 @@ protected void processRequest(final Link link, final Request request) {
13911428
s_logger.debug("Not processing " + PingRoutingCommand.class.getSimpleName() + " for agent id=" + cmdHostId + "; can't find the host in the DB");
13921429
}
13931430
}
1394-
answer = new PingAnswer((PingCommand)cmd);
1431+
if (host!= null && host.getStatus() != Status.Up && gatewayAccessible) {
1432+
requestStartupCommand = true;
1433+
}
1434+
answer = new PingAnswer((PingCommand)cmd, requestStartupCommand);
13951435
} else if (cmd instanceof ReadyAnswer) {
13961436
final HostVO host = _hostDao.findById(attache.getId());
13971437
if (host == null) {
@@ -1913,4 +1953,8 @@ public void propagateChangeToAgents(Map<String, String> params) {
19131953
sendCommandToAgents(hostsPerZone, params);
19141954
}
19151955
}
1956+
1957+
private GlobalLock getHostJoinLock(Long hostId) {
1958+
return GlobalLock.getInternLock(String.format("%s-%s", "Host-Join", hostId));
1959+
}
19161960
}

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -1343,8 +1343,8 @@ protected void setupMemoryBalloonStatsPeriod(Connect conn) {
13431343
}
13441344
s_logger.debug(String.format("The memory balloon stats period [%s] has been set successfully for the VM (Libvirt Domain) with ID [%s] and name [%s].",
13451345
currentVmBalloonStatsPeriod, vmId, dm.getName()));
1346-
} catch (final LibvirtException e) {
1347-
s_logger.warn("Failed to set up memory balloon stats period." + e.getMessage());
1346+
} catch (final Exception e) {
1347+
s_logger.warn(String.format("Failed to set up memory balloon stats period for the VM %s with exception %s", parser.getName(), e.getMessage()));
13481348
}
13491349
}
13501350
}

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtDomainXMLParser.java

+18-7
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ public class LibvirtDomainXMLParser {
5959
private Integer vncPort;
6060
private String desc;
6161

62+
private String name;
63+
6264
public boolean parseDomainXML(String domXML) {
6365
DocumentBuilder builder;
6466
try {
@@ -71,6 +73,7 @@ public boolean parseDomainXML(String domXML) {
7173
Element rootElement = doc.getDocumentElement();
7274

7375
desc = getTagValue("description", rootElement);
76+
name = getTagValue("name", rootElement);
7477

7578
Element devices = (Element)rootElement.getElementsByTagName("devices").item(0);
7679
NodeList disks = devices.getElementsByTagName("disk");
@@ -312,15 +315,19 @@ public boolean parseDomainXML(String domXML) {
312315
String path = getTagValue("backend", rng);
313316
String bytes = getAttrValue("rate", "bytes", rng);
314317
String period = getAttrValue("rate", "period", rng);
315-
316-
if (StringUtils.isEmpty(backendModel)) {
317-
def = new RngDef(path, Integer.parseInt(bytes), Integer.parseInt(period));
318+
if (StringUtils.isAnyEmpty(bytes, period)) {
319+
s_logger.debug(String.format("Bytes and period in the rng section should not be null, please check the VM %s", name));
318320
} else {
319-
def = new RngDef(path, RngBackendModel.valueOf(backendModel.toUpperCase()),
320-
Integer.parseInt(bytes), Integer.parseInt(period));
321+
if (StringUtils.isEmpty(backendModel)) {
322+
def = new RngDef(path, Integer.parseInt(bytes), Integer.parseInt(period));
323+
} else {
324+
def = new RngDef(path, RngBackendModel.valueOf(backendModel.toUpperCase()),
325+
Integer.parseInt(bytes), Integer.parseInt(period));
326+
}
327+
}
328+
if (def != null) {
329+
rngDefs.add(def);
321330
}
322-
323-
rngDefs.add(def);
324331
}
325332

326333
NodeList watchDogs = devices.getElementsByTagName("watchdog");
@@ -427,4 +434,8 @@ public List<WatchDogDef> getWatchDogs() {
427434
public String getDescription() {
428435
return desc;
429436
}
437+
438+
public String getName() {
439+
return name;
440+
}
430441
}

0 commit comments

Comments
 (0)