Skip to content

Commit afde634

Browse files
Merge branch 'main' into r1
2 parents 3ef21f4 + 09c0a58 commit afde634

2 files changed

Lines changed: 119 additions & 45 deletions

File tree

optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -952,9 +952,21 @@ public void redeployWithSolution(ObjectNode solution) {
952952
log.info("variables: {}",variables);
953953
ObjectNode kubevela = rewriteKubevelaWithSolution(variables);
954954
log.info("kubevela with variables substitution: {}",kubevela);
955-
currentKubevela = kubevela.deepCopy();
955+
956+
/**
957+
* Check if the kubevela has changed. If it has, redeploy the vela file
958+
*/
959+
boolean kubevelaChanged = false;
960+
try {
961+
kubevelaChanged = !yamlMapper.writeValueAsString(kubevela).equals(kubevela);
962+
}catch(Exception ex)
963+
{
964+
log.error("Failed to check if kubevela changed. Assuming it changed.",ex);
965+
kubevelaChanged = true;
966+
}
967+
956968
if (deployGeneration > 0) {
957-
NebulousAppDeployer.redeployApplication(this, kubevela);
969+
NebulousAppDeployer.redeployApplication(this, kubevela,kubevelaChanged);
958970
} else {
959971
// Since the solver is started as part of the initial deployment
960972
// this branch is effectively dead code -- but in case the overall
@@ -963,6 +975,7 @@ public void redeployWithSolution(ObjectNode solution) {
963975
log.warn("App received a solver solution before being deployed, this is unexpected. Boldly moving forward with initial deployment.");
964976
NebulousAppDeployer.deployApplication(this, kubevela);
965977
}
978+
currentKubevela = kubevela.deepCopy();
966979
}
967980

968981
/**
@@ -1004,7 +1017,7 @@ public void appHealthCheck() {
10041017
return;
10051018
}
10061019

1007-
NebulousAppDeployer.redeployApplication(this, currentKubevela.deepCopy());
1020+
NebulousAppDeployer.redeployApplication(this, currentKubevela.deepCopy(),false);
10081021
}catch(Exception ex)
10091022
{
10101023
log.error("Failed appHealthCheck",ex);

optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java

Lines changed: 103 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,32 @@ public static List<Requirement> getControllerRequirements(String jobID) {
4646
new AttributeRequirement("hardware", "cores", RequirementOperator.GEQ, "4")));
4747
return reqs;
4848
}
49+
50+
private static Map<String,String> getAppEnvVarsMap(NebulousApp app)
51+
{
52+
Map<String,String> envirnoment = new HashMap<String, String>();
53+
for (final JsonNode v : app.getOriginalAppMessage().withArray("/environmentVariables")) {
54+
if (v.has("name") && v.has("value") && v.get("name").isTextual()) {
55+
// TODO: figure out what to do with the `"secret":true` field
56+
envirnoment.put(v.get("name").asText(), v.get("value").asText());
57+
} else {
58+
log.warn("Invalid environmentVariables entry: {}", v);
59+
}
60+
}
61+
return envirnoment;
62+
}
63+
64+
/**
65+
* Check if the app is a best effort app.
66+
* A best effort app is an app that tolerates partial deployments due to missing node candidates.
67+
* @param app the app
68+
* @return true if the app is a best effort app, false otherwise
69+
*/
70+
private static boolean isBestEffort(NebulousApp app)
71+
{
72+
Map<String,String> envs = getAppEnvVarsMap(app);
73+
return envs.containsKey("BEST_EFFORT") && envs.get("BEST_EFFORT").toLowerCase().equals("true");
74+
}
4975

5076
/**
5177
* Check if an edge node has a job id assigned.
@@ -540,12 +566,7 @@ public static void deployApplication(NebulousApp app, JsonNode kubevela) {
540566
// track of its suggested node candidates.
541567
String masterNodeName = "m" + clusterName.toLowerCase() + "-master";
542568
suggestedNodeCandidates.put(masterNodeName, controllerCandidates);
543-
if (!checkComponentNodeCandidates(suggestedNodeCandidates, componentRequirements)) {
544-
app.setStateFailed(List.of());
545-
log.error("Aborting deployment");
546-
return;
547-
}
548-
569+
549570
// ------------------------------------------------------------
550571
// Select node candidates
551572

@@ -587,6 +608,7 @@ public static void deployApplication(NebulousApp app, JsonNode kubevela) {
587608
// candidate (already includes master node at this point)
588609
// - nodeLabels: a map from node name to its label
589610
Map<String, Set<String>> componentNodeNames = new HashMap<>();
611+
boolean bestEffort = isBestEffort(app);
590612
for (Map.Entry<String, List<Requirement>> e : componentRequirements.entrySet()) {
591613
String componentName = e.getKey();
592614
int numberOfNodes = nodeCounts.get(componentName);
@@ -603,9 +625,19 @@ public static void deployApplication(NebulousApp app, JsonNode kubevela) {
603625
.findFirst()
604626
.orElse(null);
605627
if (candidate == null) {
606-
log.error("No available node candidate for node {} of component {}, aborting deployment", nodeNumber, componentName);
607-
app.setStateFailed(deployedNodeCandidates.values());
608-
return;
628+
if(bestEffort)
629+
{
630+
nodeCounts.put(componentName, nodeNumber);
631+
log.error("No available node candidate for node {} of component {}, continuing deployment regardless", nodeNumber, componentName);
632+
break;
633+
}else
634+
{
635+
log.error("No available node candidate for node {} of component {}, aborting deployment", nodeNumber, componentName);
636+
app.setStateFailed(deployedNodeCandidates.values());
637+
return;
638+
}
639+
640+
609641
}
610642
if (candidate.isEdgeNodeCandidate()) {
611643
if (!isEdgeNodeBusy(candidate) && EdgeNodes.acquire(appUUID, candidate)) {
@@ -783,7 +815,7 @@ public static void deployApplication(NebulousApp app, JsonNode kubevela) {
783815
* @param app the NebulOuS app object.
784816
* @param updatedKubevela the KubeVela file to deploy.
785817
*/
786-
public static void redeployApplication(NebulousApp app, ObjectNode updatedKubevela) {
818+
public static void redeployApplication(NebulousApp app, ObjectNode updatedKubevela, boolean kubevelaChanged) {
787819
String appUUID = app.getUUID();
788820
String clusterName = app.getClusterName();
789821
ExnConnector conn = app.getExnConnector();
@@ -855,6 +887,7 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
855887
//Fetch the whole list of dead nodes from SAL
856888
List<String> deadNodeNames = conn.getAppDeadNodes(appUUID,clusterName);
857889

890+
boolean bestEffort = isBestEffort(app);
858891
for (String componentName : components.keySet()) {
859892
// The variable `allMachineNames` shall, at the end of each loop
860893
// body, contain the machine names for this component.
@@ -907,18 +940,31 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
907940
.findFirst()
908941
.orElse(null);
909942
if (candidate == null) {
910-
log.error("No available node candidate for node {} of component {} (out of edge nodes?). Aborting redeployment.", nodeNumber, componentName);
911-
912-
try {
913-
log.info("Proceed to free uncommited edge node candidates");
914-
EdgeNodes.release(appUUID, newNodeCandidatesRegistered);
915-
}catch(Exception ex)
916-
{
917-
log.error("Failed to free uncommited edge node candidates",ex);
918-
919-
}
920-
app.setStateRunning();
921-
return;
943+
944+
945+
if(bestEffort)
946+
{
947+
componentReplicaCounts.put(componentName, nodeNumber+oldCount);
948+
log.error("No available node candidate for node {} of component {}, continuing deployment regardless", nodeNumber, componentName);
949+
break;
950+
}else
951+
{
952+
log.error("No available node candidate for node {} of component {} (out of edge nodes?). Aborting redeployment.", nodeNumber, componentName);
953+
954+
try {
955+
log.info("Proceed to free uncommited edge node candidates");
956+
EdgeNodes.release(appUUID, newNodeCandidatesRegistered);
957+
}catch(Exception ex)
958+
{
959+
log.error("Failed to free uncommited edge node candidates",ex);
960+
961+
}
962+
app.setStateRunning();
963+
return;
964+
}
965+
966+
967+
922968
}
923969
if (candidate.isEdgeNodeCandidate()) {
924970
// If we already own the edge node, it's busy but
@@ -983,27 +1029,35 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
9831029
});
9841030
allMachineNames = new HashSet<>();
9851031
log.info("Node requirements changed, need to redeploy all nodes of component {}", componentName);
986-
int nodeNumber = 1;
987-
while (nodeNumber <= componentReplicaCounts.get(componentName)) {
1032+
int nodeNumber = 0;
1033+
while (nodeNumber < componentReplicaCounts.get(componentName)) {
9881034
String nodeName = createNodeName(clusterName, componentName, app.getDeployGeneration(), nodeNumber);
9891035
NodeCandidate candidate = candidates.stream()
9901036
.filter(each -> !isEdgeNodeBusy(each)
9911037
&& !EdgeNodes.ownedEdgeNodes(appUUID).contains(each))
9921038
.findFirst()
9931039
.orElse(null);
9941040
if (candidate == null) {
995-
log.error("No available node candidate for node {} of component {} (out of edge nodes?). Aborting redeployment.", nodeNumber, componentName);
996-
997-
try {
998-
log.info("Proceed to free uncommited edge node candidates");
999-
EdgeNodes.release(appUUID, newNodeCandidatesRegistered);
1000-
}catch(Exception ex)
1001-
{
1002-
log.error("Failed to free uncommited edge node candidates",ex);
1003-
1004-
}
1005-
app.setStateRunning();
1006-
return;
1041+
if(bestEffort)
1042+
{
1043+
componentReplicaCounts.put(componentName, nodeNumber);
1044+
log.error("No available node candidate for node {} of component {}, continuing deployment regardless", nodeNumber, componentName);
1045+
break;
1046+
}else
1047+
{
1048+
log.error("No available node candidate for node {} of component {} (out of edge nodes?). Aborting redeployment.", nodeNumber, componentName);
1049+
1050+
try {
1051+
log.info("Proceed to free uncommited edge node candidates");
1052+
EdgeNodes.release(appUUID, newNodeCandidatesRegistered);
1053+
}catch(Exception ex)
1054+
{
1055+
log.error("Failed to free uncommited edge node candidates",ex);
1056+
1057+
}
1058+
app.setStateRunning();
1059+
return;
1060+
}
10071061
}
10081062
if (candidate.isEdgeNodeCandidate()) {
10091063
// If we already own the edge node, it's busy but we
@@ -1056,14 +1110,21 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
10561110

10571111
log.info("Labeling nodes: {}", nodeLabels);
10581112
Main.logFile("redeploy-labelNodes-" + appUUID + ".json", nodeLabels.toPrettyString());
1059-
conn.labelNodes(appUUID, clusterName, nodeLabels);
1113+
if(!nodeLabels.isEmpty())conn.labelNodes(appUUID, clusterName, nodeLabels);
10601114

10611115
log.info("Redeploying application: {}", deploymentKubevela);
1062-
long proActiveJobID = conn.deployApplication(appUUID, clusterName, app.getName(), deploymentKubevela);
1063-
if (proActiveJobID == 0) {
1064-
// 0 means conversion from long has failed (because of an
1065-
// invalid response), OR a ProActive job id of 0.
1066-
log.error("DeployApplication ProActive job ID = 0, deployApplication has probably failed during redeployment; continuing and hoping for the best.");
1116+
if(kubevelaChanged)
1117+
{
1118+
long proActiveJobID = conn.deployApplication(appUUID, clusterName, app.getName(), deploymentKubevela);
1119+
if (proActiveJobID == 0) {
1120+
// 0 means conversion from long has failed (because of an
1121+
// invalid response), OR a ProActive job id of 0.
1122+
log.error("DeployApplication ProActive job ID = 0, deployApplication has probably failed during redeployment; continuing and hoping for the best.");
1123+
}
1124+
1125+
}else
1126+
{
1127+
log.info("Kubevela has not changed, skipping redeployment");
10671128
}
10681129
// TODO: wait until redeployment finished before scaling down the
10691130
// cluster, so that kubernetes can move containers etc.

0 commit comments

Comments
 (0)