@@ -46,6 +46,32 @@ public static List<Requirement> getControllerRequirements(String jobID) {
4646 new AttributeRequirement ("hardware" , "cores" , RequirementOperator .GEQ , "4" )));
4747 return reqs ;
4848 }
49+
50+ private static Map <String ,String > getAppEnvVarsMap (NebulousApp app )
51+ {
52+ Map <String ,String > envirnoment = new HashMap <String , String >();
53+ for (final JsonNode v : app .getOriginalAppMessage ().withArray ("/environmentVariables" )) {
54+ if (v .has ("name" ) && v .has ("value" ) && v .get ("name" ).isTextual ()) {
55+ // TODO: figure out what to do with the `"secret":true` field
56+ envirnoment .put (v .get ("name" ).asText (), v .get ("value" ).asText ());
57+ } else {
58+ log .warn ("Invalid environmentVariables entry: {}" , v );
59+ }
60+ }
61+ return envirnoment ;
62+ }
63+
64+ /**
65+ * Check if the app is a best effort app.
66+ * A best effort app is an app that tolerates partial deployments due to missing node candidates.
67+ * @param app the app
68+ * @return true if the app is a best effort app, false otherwise
69+ */
70+ private static boolean isBestEffort (NebulousApp app )
71+ {
72+ Map <String ,String > envs = getAppEnvVarsMap (app );
73+ return envs .containsKey ("BEST_EFFORT" ) && envs .get ("BEST_EFFORT" ).toLowerCase ().equals ("true" );
74+ }
4975
5076 /**
5177 * Check if an edge node has a job id assigned.
@@ -540,12 +566,7 @@ public static void deployApplication(NebulousApp app, JsonNode kubevela) {
540566 // track of its suggested node candidates.
541567 String masterNodeName = "m" + clusterName .toLowerCase () + "-master" ;
542568 suggestedNodeCandidates .put (masterNodeName , controllerCandidates );
543- if (!checkComponentNodeCandidates (suggestedNodeCandidates , componentRequirements )) {
544- app .setStateFailed (List .of ());
545- log .error ("Aborting deployment" );
546- return ;
547- }
548-
569+
549570 // ------------------------------------------------------------
550571 // Select node candidates
551572
@@ -587,6 +608,7 @@ public static void deployApplication(NebulousApp app, JsonNode kubevela) {
587608 // candidate (already includes master node at this point)
588609 // - nodeLabels: a map from node name to its label
589610 Map <String , Set <String >> componentNodeNames = new HashMap <>();
611+ boolean bestEffort = isBestEffort (app );
590612 for (Map .Entry <String , List <Requirement >> e : componentRequirements .entrySet ()) {
591613 String componentName = e .getKey ();
592614 int numberOfNodes = nodeCounts .get (componentName );
@@ -603,9 +625,19 @@ public static void deployApplication(NebulousApp app, JsonNode kubevela) {
603625 .findFirst ()
604626 .orElse (null );
605627 if (candidate == null ) {
606- log .error ("No available node candidate for node {} of component {}, aborting deployment" , nodeNumber , componentName );
607- app .setStateFailed (deployedNodeCandidates .values ());
608- return ;
628+ if (bestEffort )
629+ {
630+ nodeCounts .put (componentName , nodeNumber );
631+ log .error ("No available node candidate for node {} of component {}, continuing deployment regardless" , nodeNumber , componentName );
632+ break ;
633+ }else
634+ {
635+ log .error ("No available node candidate for node {} of component {}, aborting deployment" , nodeNumber , componentName );
636+ app .setStateFailed (deployedNodeCandidates .values ());
637+ return ;
638+ }
639+
640+
609641 }
610642 if (candidate .isEdgeNodeCandidate ()) {
611643 if (!isEdgeNodeBusy (candidate ) && EdgeNodes .acquire (appUUID , candidate )) {
@@ -783,7 +815,7 @@ public static void deployApplication(NebulousApp app, JsonNode kubevela) {
783815 * @param app the NebulOuS app object.
784816 * @param updatedKubevela the KubeVela file to deploy.
785817 */
786- public static void redeployApplication (NebulousApp app , ObjectNode updatedKubevela ) {
818+ public static void redeployApplication (NebulousApp app , ObjectNode updatedKubevela , boolean kubevelaChanged ) {
787819 String appUUID = app .getUUID ();
788820 String clusterName = app .getClusterName ();
789821 ExnConnector conn = app .getExnConnector ();
@@ -855,6 +887,7 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
855887 //Fetch the whole list of dead nodes from SAL
856888 List <String > deadNodeNames = conn .getAppDeadNodes (appUUID ,clusterName );
857889
890+ boolean bestEffort = isBestEffort (app );
858891 for (String componentName : components .keySet ()) {
859892 // The variable `allMachineNames` shall, at the end of each loop
860893 // body, contain the machine names for this component.
@@ -907,18 +940,31 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
907940 .findFirst ()
908941 .orElse (null );
909942 if (candidate == null ) {
910- log .error ("No available node candidate for node {} of component {} (out of edge nodes?). Aborting redeployment." , nodeNumber , componentName );
911-
912- try {
913- log .info ("Proceed to free uncommited edge node candidates" );
914- EdgeNodes .release (appUUID , newNodeCandidatesRegistered );
915- }catch (Exception ex )
916- {
917- log .error ("Failed to free uncommited edge node candidates" ,ex );
918-
919- }
920- app .setStateRunning ();
921- return ;
943+
944+
945+ if (bestEffort )
946+ {
947+ componentReplicaCounts .put (componentName , nodeNumber +oldCount );
948+ log .error ("No available node candidate for node {} of component {}, continuing deployment regardless" , nodeNumber , componentName );
949+ break ;
950+ }else
951+ {
952+ log .error ("No available node candidate for node {} of component {} (out of edge nodes?). Aborting redeployment." , nodeNumber , componentName );
953+
954+ try {
955+ log .info ("Proceed to free uncommited edge node candidates" );
956+ EdgeNodes .release (appUUID , newNodeCandidatesRegistered );
957+ }catch (Exception ex )
958+ {
959+ log .error ("Failed to free uncommited edge node candidates" ,ex );
960+
961+ }
962+ app .setStateRunning ();
963+ return ;
964+ }
965+
966+
967+
922968 }
923969 if (candidate .isEdgeNodeCandidate ()) {
924970 // If we already own the edge node, it's busy but
@@ -983,27 +1029,35 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
9831029 });
9841030 allMachineNames = new HashSet <>();
9851031 log .info ("Node requirements changed, need to redeploy all nodes of component {}" , componentName );
986- int nodeNumber = 1 ;
987- while (nodeNumber <= componentReplicaCounts .get (componentName )) {
1032+ int nodeNumber = 0 ;
1033+ while (nodeNumber < componentReplicaCounts .get (componentName )) {
9881034 String nodeName = createNodeName (clusterName , componentName , app .getDeployGeneration (), nodeNumber );
9891035 NodeCandidate candidate = candidates .stream ()
9901036 .filter (each -> !isEdgeNodeBusy (each )
9911037 && !EdgeNodes .ownedEdgeNodes (appUUID ).contains (each ))
9921038 .findFirst ()
9931039 .orElse (null );
9941040 if (candidate == null ) {
995- log .error ("No available node candidate for node {} of component {} (out of edge nodes?). Aborting redeployment." , nodeNumber , componentName );
996-
997- try {
998- log .info ("Proceed to free uncommited edge node candidates" );
999- EdgeNodes .release (appUUID , newNodeCandidatesRegistered );
1000- }catch (Exception ex )
1001- {
1002- log .error ("Failed to free uncommited edge node candidates" ,ex );
1003-
1004- }
1005- app .setStateRunning ();
1006- return ;
1041+ if (bestEffort )
1042+ {
1043+ componentReplicaCounts .put (componentName , nodeNumber );
1044+ log .error ("No available node candidate for node {} of component {}, continuing deployment regardless" , nodeNumber , componentName );
1045+ break ;
1046+ }else
1047+ {
1048+ log .error ("No available node candidate for node {} of component {} (out of edge nodes?). Aborting redeployment." , nodeNumber , componentName );
1049+
1050+ try {
1051+ log .info ("Proceed to free uncommited edge node candidates" );
1052+ EdgeNodes .release (appUUID , newNodeCandidatesRegistered );
1053+ }catch (Exception ex )
1054+ {
1055+ log .error ("Failed to free uncommited edge node candidates" ,ex );
1056+
1057+ }
1058+ app .setStateRunning ();
1059+ return ;
1060+ }
10071061 }
10081062 if (candidate .isEdgeNodeCandidate ()) {
10091063 // If we already own the edge node, it's busy but we
@@ -1056,14 +1110,21 @@ public static void redeployApplication(NebulousApp app, ObjectNode updatedKubeve
10561110
10571111 log .info ("Labeling nodes: {}" , nodeLabels );
10581112 Main .logFile ("redeploy-labelNodes-" + appUUID + ".json" , nodeLabels .toPrettyString ());
1059- conn .labelNodes (appUUID , clusterName , nodeLabels );
1113+ if (! nodeLabels . isEmpty ()) conn .labelNodes (appUUID , clusterName , nodeLabels );
10601114
10611115 log .info ("Redeploying application: {}" , deploymentKubevela );
1062- long proActiveJobID = conn .deployApplication (appUUID , clusterName , app .getName (), deploymentKubevela );
1063- if (proActiveJobID == 0 ) {
1064- // 0 means conversion from long has failed (because of an
1065- // invalid response), OR a ProActive job id of 0.
1066- log .error ("DeployApplication ProActive job ID = 0, deployApplication has probably failed during redeployment; continuing and hoping for the best." );
1116+ if (kubevelaChanged )
1117+ {
1118+ long proActiveJobID = conn .deployApplication (appUUID , clusterName , app .getName (), deploymentKubevela );
1119+ if (proActiveJobID == 0 ) {
1120+ // 0 means conversion from long has failed (because of an
1121+ // invalid response), OR a ProActive job id of 0.
1122+ log .error ("DeployApplication ProActive job ID = 0, deployApplication has probably failed during redeployment; continuing and hoping for the best." );
1123+ }
1124+
1125+ }else
1126+ {
1127+ log .info ("Kubevela has not changed, skipping redeployment" );
10671128 }
10681129 // TODO: wait until redeployment finished before scaling down the
10691130 // cluster, so that kubernetes can move containers etc.
0 commit comments