From 4cfa7a407b4167912bee3d1a3f926ebd8a015fca Mon Sep 17 00:00:00 2001 From: Daniel Augusto Veronezi Salvador Date: Thu, 18 Feb 2021 11:19:51 -0300 Subject: [PATCH 1/2] Improve logs on KVMHostActivityChecker --- .../kvm/ha/KVMHostActivityChecker.java | 86 ++++++++++++------- 1 file changed, 55 insertions(+), 31 deletions(-) diff --git a/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java b/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java index 060b484fec7d..5323c2397198 100644 --- a/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java +++ b/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java @@ -69,12 +69,13 @@ public class KVMHostActivityChecker extends AdapterBase implements ActivityCheck public boolean isActive(Host r, DateTime suspectTime) throws HACheckerException { try { return isVMActivtyOnHost(r, suspectTime); - } - catch (StorageUnavailableException e){ - throw new HACheckerException("Storage is unavailable to do the check, mostly host is not reachable ", e); - } - catch (Exception e){ - throw new HACheckerException("Operation timed out, mostly host is not reachable ", e); + } catch (HACheckerException e) { + //Re-throwing the exception to avoid poluting the 'HACheckerException' already thrown + throw e; + } catch (Exception e){ + String message = String.format("Operation timed out, probably the host [%s] is not reachable.", r.getId()); + LOG.warn(message, e); + throw new HACheckerException(message, e); } } @@ -85,24 +86,28 @@ public boolean isHealthy(Host r) { private boolean isAgentActive(Host agent) { if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) { - throw new IllegalStateException("Calling KVM investigator for non KVM Host of type " + agent.getHypervisorType()); + throw new IllegalStateException(String.format("Calling KVM investigator for non KVM Host of type [%s].", agent.getHypervisorType())); } Status hostStatus = Status.Unknown; Status neighbourStatus = Status.Unknown; final CheckOnHostCommand cmd = new CheckOnHostCommand(agent); try { + LOG.debug(String.format("Checking host [%s] status...", agent.getId())); Answer answer = agentMgr.easySend(agent.getId(), cmd); if (answer != null) { hostStatus = answer.getResult() ? Status.Down : Status.Up; + LOG.debug(String.format("Host [%s] has the status [%s].", agent.getId(), hostStatus)); + if ( hostStatus == Status.Up ){ return true; } } else { + LOG.debug(String.format("Setting host [%s] to \"Disconnected\" status.", agent.getId())); hostStatus = Status.Disconnected; } } catch (Exception e) { - LOG.warn("Failed to send command to host: " + agent.getId()); + LOG.warn(String.format("Failed to send command CheckOnHostCommand to host [%s].", agent.getId()), e); } List neighbors = resourceManager.listHostsInClusterByStatus(agent.getClusterId(), Status.Up); @@ -110,24 +115,24 @@ private boolean isAgentActive(Host agent) { if (neighbor.getId() == agent.getId() || (neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM && neighbor.getHypervisorType() != Hypervisor.HypervisorType.LXC)) { continue; } - if (LOG.isTraceEnabled()){ - LOG.trace("Investigating host:" + agent.getId() + " via neighbouring host:" + neighbor.getId()); - } + try { + LOG.debug(String.format("Investigating host [%s] via neighbouring host [%s].", agent.getId(), neighbor.getId())); + Answer answer = agentMgr.easySend(neighbor.getId(), cmd); if (answer != null) { neighbourStatus = answer.getResult() ? Status.Down : Status.Up; - if (LOG.isTraceEnabled()){ - LOG.trace("Neighbouring host:" + neighbor.getId() + " returned status:" + neighbourStatus + " for the investigated host:" + agent.getId()); - } + + LOG.debug(String.format("Neighbouring host [%s] returned status [%s] for the investigated host [%s].", neighbor.getId(), neighbourStatus, agent.getId())); + if (neighbourStatus == Status.Up) { break; } + } else { + LOG.debug(String.format("Neighbouring host [%s] is Disconnected.", neighbor.getId())); } } catch (Exception e) { - if (LOG.isTraceEnabled()) { - LOG.trace("Failed to send command to host: " + neighbor.getId()); - } + LOG.warn(String.format("Failed to send command CheckOnHostCommand to host [%s].", neighbor.getId()), e); } } if (neighbourStatus == Status.Up && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) { @@ -137,32 +142,48 @@ private boolean isAgentActive(Host agent) { hostStatus = Status.Down; } - if (LOG.isTraceEnabled()){ - LOG.trace("Resource state = " + hostStatus.name()); - } + LOG.debug(String.format("Host [%s] has the status [%s].", agent.getId(), hostStatus)); + return hostStatus == Status.Up; } - private boolean isVMActivtyOnHost(Host agent, DateTime suspectTime) throws StorageUnavailableException { + private boolean isVMActivtyOnHost(Host agent, DateTime suspectTime) throws HACheckerException { if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) { - throw new IllegalStateException("Calling KVM investigator for non KVM Host of type " + agent.getHypervisorType()); + throw new IllegalStateException(String.format("Calling KVM investigator for non KVM Host of type [%s].", agent.getHypervisorType())); } boolean activityStatus = true; HashMap> poolVolMap = getVolumeUuidOnHost(agent); for (StoragePool pool : poolVolMap.keySet()) { - //for each storage pool find activity - List volume_list = poolVolMap.get(pool); - final CheckVMActivityOnStoragePoolCommand cmd = new CheckVMActivityOnStoragePoolCommand(agent, pool, volume_list, suspectTime); - //send the command to appropriate storage pool + activityStatus = verifyActivityOfStorageOnHost(poolVolMap, pool, agent, suspectTime, activityStatus); + if (!activityStatus) { + LOG.warn(String.format("It seems that the storage pool [%s] does not have activity on host [%s].", pool.getId(), agent.getId())); + break; + } + } + + return activityStatus; + } + + protected boolean verifyActivityOfStorageOnHost(HashMap> poolVolMap, StoragePool pool, Host agent, DateTime suspectTime, boolean activityStatus) throws HACheckerException, IllegalStateException { + List volume_list = poolVolMap.get(pool); + final CheckVMActivityOnStoragePoolCommand cmd = new CheckVMActivityOnStoragePoolCommand(agent, pool, volume_list, suspectTime); + + LOG.debug(String.format("Checking VM activity for host [%s] on storage pool [%s].", agent.getId(), pool.getId())); + try { Answer answer = storageManager.sendToPool(pool, getNeighbors(agent), cmd); + if (answer != null) { - activityStatus = ! answer.getResult(); + activityStatus = !answer.getResult(); + LOG.debug(String.format("Resource [%s] %s activity on storage pool [%s]", agent.getId(), activityStatus ? "has" : "does not have", pool.getId())); } else { - throw new IllegalStateException("Did not get a valid response for VM activity check for host " + agent.getId()); + String message = String.format("Did not get a valid response for VM activity check for host [%s] on storage pool [%s].", agent.getId(), pool.getId()); + LOG.debug(message); + throw new IllegalStateException(message); } - } - if (LOG.isDebugEnabled()){ - LOG.debug("Resource active = " + activityStatus); + } catch (StorageUnavailableException e){ + String message = String.format("Storage [%s] is unavailable to do the check, probably the host [%s] is not reachable.", pool.getId(), agent.getId()); + LOG.warn(message, e); + throw new HACheckerException(message, e); } return activityStatus; } @@ -171,12 +192,14 @@ private HashMap> getVolumeUuidOnHost(Host agent) { List vm_list = vmInstanceDao.listByHostId(agent.getId()); List volume_list = new ArrayList(); for (VirtualMachine vm : vm_list) { + LOG.debug(String.format("Retrieving volumes of VM [%s]...", vm.getId())); List vm_volume_list = volumeDao.findByInstance(vm.getId()); volume_list.addAll(vm_volume_list); } HashMap> poolVolMap = new HashMap>(); for (Volume vol : volume_list) { + LOG.debug(String.format("Retrieving storage pool [%s] of volume [%s]...", vol.getPoolId(), vol.getId())); StoragePool sp = storagePool.findById(vol.getPoolId()); if (!poolVolMap.containsKey(sp)) { List list = new ArrayList(); @@ -193,6 +216,7 @@ private HashMap> getVolumeUuidOnHost(Host agent) { public long[] getNeighbors(Host agent) { List neighbors = new ArrayList(); List cluster_hosts = resourceManager.listHostsInClusterByStatus(agent.getClusterId(), Status.Up); + LOG.debug(String.format("Retrieving all \"Up\" hosts from cluster [%s]...", agent.getClusterId())); for (HostVO host : cluster_hosts) { if (host.getId() == agent.getId() || (host.getHypervisorType() != Hypervisor.HypervisorType.KVM && host.getHypervisorType() != Hypervisor.HypervisorType.LXC)) { continue; From 971b04754fcf175f7df73dbce3aae418d4d91737 Mon Sep 17 00:00:00 2001 From: Daniel Augusto Veronezi Salvador Date: Thu, 18 Feb 2021 15:53:53 -0300 Subject: [PATCH 2/2] Add more host information in log --- .../src/main/java/com/cloud/host/HostVO.java | 2 +- .../kvm/ha/KVMHostActivityChecker.java | 30 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/engine/schema/src/main/java/com/cloud/host/HostVO.java b/engine/schema/src/main/java/com/cloud/host/HostVO.java index f23435945d50..18dcac9a4a24 100644 --- a/engine/schema/src/main/java/com/cloud/host/HostVO.java +++ b/engine/schema/src/main/java/com/cloud/host/HostVO.java @@ -677,7 +677,7 @@ public boolean equals(Object obj) { @Override public String toString() { - return new StringBuilder("Host[").append("-").append(id).append("-").append(type).append("]").toString(); + return String.format("Host [{id: \"%s\", name: \"%s\", uuid: \"%s\", type=\"%s\"}]", id, name, uuid, type); } public void setHypervisorType(HypervisorType hypervisorType) { diff --git a/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java b/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java index 5323c2397198..e5752cb97da2 100644 --- a/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java +++ b/plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java @@ -73,7 +73,7 @@ public boolean isActive(Host r, DateTime suspectTime) throws HACheckerException //Re-throwing the exception to avoid poluting the 'HACheckerException' already thrown throw e; } catch (Exception e){ - String message = String.format("Operation timed out, probably the host [%s] is not reachable.", r.getId()); + String message = String.format("Operation timed out, probably the %s is not reachable.", r.toString()); LOG.warn(message, e); throw new HACheckerException(message, e); } @@ -92,22 +92,22 @@ private boolean isAgentActive(Host agent) { Status neighbourStatus = Status.Unknown; final CheckOnHostCommand cmd = new CheckOnHostCommand(agent); try { - LOG.debug(String.format("Checking host [%s] status...", agent.getId())); + LOG.debug(String.format("Checking %s status...", agent.toString())); Answer answer = agentMgr.easySend(agent.getId(), cmd); if (answer != null) { hostStatus = answer.getResult() ? Status.Down : Status.Up; - LOG.debug(String.format("Host [%s] has the status [%s].", agent.getId(), hostStatus)); + LOG.debug(String.format("%s has the status [%s].", agent.toString(), hostStatus)); if ( hostStatus == Status.Up ){ return true; } } else { - LOG.debug(String.format("Setting host [%s] to \"Disconnected\" status.", agent.getId())); + LOG.debug(String.format("Setting %s to \"Disconnected\" status.", agent.toString())); hostStatus = Status.Disconnected; } } catch (Exception e) { - LOG.warn(String.format("Failed to send command CheckOnHostCommand to host [%s].", agent.getId()), e); + LOG.warn(String.format("Failed to send command CheckOnHostCommand to %s.", agent.toString()), e); } List neighbors = resourceManager.listHostsInClusterByStatus(agent.getClusterId(), Status.Up); @@ -117,22 +117,22 @@ private boolean isAgentActive(Host agent) { } try { - LOG.debug(String.format("Investigating host [%s] via neighbouring host [%s].", agent.getId(), neighbor.getId())); + LOG.debug(String.format("Investigating %s via neighbouring %s.", agent.toString(), neighbor.toString())); Answer answer = agentMgr.easySend(neighbor.getId(), cmd); if (answer != null) { neighbourStatus = answer.getResult() ? Status.Down : Status.Up; - LOG.debug(String.format("Neighbouring host [%s] returned status [%s] for the investigated host [%s].", neighbor.getId(), neighbourStatus, agent.getId())); + LOG.debug(String.format("Neighbouring %s returned status [%s] for the investigated %s.", neighbor.toString(), neighbourStatus, agent.toString())); if (neighbourStatus == Status.Up) { break; } } else { - LOG.debug(String.format("Neighbouring host [%s] is Disconnected.", neighbor.getId())); + LOG.debug(String.format("Neighbouring %s is Disconnected.", neighbor.toString())); } } catch (Exception e) { - LOG.warn(String.format("Failed to send command CheckOnHostCommand to host [%s].", neighbor.getId()), e); + LOG.warn(String.format("Failed to send command CheckOnHostCommand to %s.", neighbor.toString()), e); } } if (neighbourStatus == Status.Up && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) { @@ -142,7 +142,7 @@ private boolean isAgentActive(Host agent) { hostStatus = Status.Down; } - LOG.debug(String.format("Host [%s] has the status [%s].", agent.getId(), hostStatus)); + LOG.debug(String.format("%s has the status [%s].", agent.toString(), hostStatus)); return hostStatus == Status.Up; } @@ -156,7 +156,7 @@ private boolean isVMActivtyOnHost(Host agent, DateTime suspectTime) throws HAChe for (StoragePool pool : poolVolMap.keySet()) { activityStatus = verifyActivityOfStorageOnHost(poolVolMap, pool, agent, suspectTime, activityStatus); if (!activityStatus) { - LOG.warn(String.format("It seems that the storage pool [%s] does not have activity on host [%s].", pool.getId(), agent.getId())); + LOG.warn(String.format("It seems that the storage pool [%s] does not have activity on %s.", pool.getId(), agent.toString())); break; } } @@ -168,20 +168,20 @@ protected boolean verifyActivityOfStorageOnHost(HashMap volume_list = poolVolMap.get(pool); final CheckVMActivityOnStoragePoolCommand cmd = new CheckVMActivityOnStoragePoolCommand(agent, pool, volume_list, suspectTime); - LOG.debug(String.format("Checking VM activity for host [%s] on storage pool [%s].", agent.getId(), pool.getId())); + LOG.debug(String.format("Checking VM activity for %s on storage pool [%s].", agent.toString(), pool.getId())); try { Answer answer = storageManager.sendToPool(pool, getNeighbors(agent), cmd); if (answer != null) { activityStatus = !answer.getResult(); - LOG.debug(String.format("Resource [%s] %s activity on storage pool [%s]", agent.getId(), activityStatus ? "has" : "does not have", pool.getId())); + LOG.debug(String.format("%s %s activity on storage pool [%s]", agent.toString(), activityStatus ? "has" : "does not have", pool.getId())); } else { - String message = String.format("Did not get a valid response for VM activity check for host [%s] on storage pool [%s].", agent.getId(), pool.getId()); + String message = String.format("Did not get a valid response for VM activity check for %s on storage pool [%s].", agent.toString(), pool.getId()); LOG.debug(message); throw new IllegalStateException(message); } } catch (StorageUnavailableException e){ - String message = String.format("Storage [%s] is unavailable to do the check, probably the host [%s] is not reachable.", pool.getId(), agent.getId()); + String message = String.format("Storage [%s] is unavailable to do the check, probably the %s is not reachable.", pool.getId(), agent.toString()); LOG.warn(message, e); throw new HACheckerException(message, e); }