66import sys
77import time
88from datetime import timedelta
9+ import re
10+ from urllib .parse import urlparse
11+
912
1013import azure .batch .models as batchmodels
1114import numpy as np
2326 VirtualMachine ,
2427 VirtualMachineImage ,
2528)
29+
30+ from azure .storage .blob import BlobServiceClient , generate_blob_sas , BlobSasPermissions
2631from azure .mgmt .monitor import MonitorManagementClient
2732from azure .mgmt .netapp import NetAppManagementClient
2833from azure .mgmt .netapp .models import NetAppAccount
2934from azure .mgmt .network import NetworkManagementClient
3035from azure .mgmt .resource import ResourceManagementClient , SubscriptionClient
36+ from azure .mgmt .storage import StorageManagementClient
37+ from azure .batch .models import ResourceFile
3138
3239from hpcadvisor import dataset_handler , logger , taskset_handler , utils
3340from hpcadvisor .azure_identity_credential_adapter import AzureIdentityCredentialAdapter
@@ -123,7 +130,7 @@ def wait_pool_ready(poolid):
123130 def verify_pool ():
124131 pool_config = batch_client .pool .get (poolid )
125132 if pool_config .resize_errors and len (pool_config .resize_errors ) > 0 :
126- log .error (f"Pool { pool_id } resize failed." )
133+ log .error (f"Pool { poolid } resize failed." )
127134 msg = "\n " .join (
128135 [f" { e .code } : { e .message } " for e in pool_config .resize_errors ]
129136 )
@@ -564,7 +571,7 @@ def create_pool(sku, number_of_nodes):
564571 anfmountdir = env ["ANFMOUNTDIR" ]
565572 anfvolume = env ["ANFVOLUMENAME" ]
566573 script = f"""
567- sleep 60
574+ sleep 120
568575 sudo mkdir { anfmountdir } ; mount { anf_ip } :/{ anfvolume } { anfmountdir }
569576 sudo chown _azbatch:_azbatchgrp { anfmountdir }
570577 sudo df -Tha
@@ -656,6 +663,61 @@ def create_pool(sku, number_of_nodes):
656663
657664 return poolname
658665
666+ def is_url (input_string ):
667+
668+ is_url_bool = False
669+ parsed = urlparse (input_string )
670+
671+ if parsed .scheme in ("http" , "https" ) and parsed .netloc :
672+ is_url_bool = True
673+
674+ domain_pattern = re .compile (r"^[\w.-]+\.[a-z]{2,}(?:/|$)" )
675+ if domain_pattern .match (input_string ):
676+ is_url_bool = True
677+
678+ if os .path .exists (input_string ):
679+ is_url_bool = False
680+
681+ log .debug (f"Input string '{ input_string } ' is URL: { is_url_bool } " )
682+ return is_url_bool
683+
684+ def get_blob_storage_key (subscription_id , resource_group , storage_account_name ):
685+ credential = DefaultAzureCredential ()
686+ storage_client = StorageManagementClient (credential , subscription_id )
687+ keys = storage_client .storage_accounts .list_keys (resource_group , storage_account_name )
688+ return keys .keys [0 ].value # Use the first key
689+
690+ def upload_file_to_blob (local_file_path , blob_storage_account , blob_storage_key , blob_container ):
691+
692+ log .info (f"Uploading file { local_file_path } to blob storage" )
693+ blob_storage_key = get_blob_storage_key (
694+ env ["SUBSCRIPTION" ],
695+ env ["RG" ],
696+ blob_storage_account
697+ )
698+
699+ log .debug (f"blob_storage_account={ blob_storage_account } , blob_container={ blob_container } , blob_storage_key={ blob_storage_key } " )
700+
701+ script_name = os .path .basename (local_file_path )
702+ blob_service_client = BlobServiceClient (
703+ f"https://{ blob_storage_account } .blob.core.windows.net" , credential = blob_storage_key
704+ )
705+ blob_client = blob_service_client .get_blob_client (container = blob_container , blob = script_name )
706+ with open (local_file_path , "rb" ) as data :
707+ blob_client .upload_blob (data , overwrite = True )
708+
709+ sas_token = generate_blob_sas (
710+ account_name = blob_storage_account ,
711+ container_name = blob_container ,
712+ blob_name = script_name ,
713+ account_key = blob_storage_key ,
714+ permission = BlobSasPermissions (read = True ),
715+ expiry = datetime .datetime .now (datetime .timezone .utc ) + timedelta (hours = 2 )
716+ )
717+
718+ log .info (f"File { script_name } uploaded to blob storage: { blob_client .url } " )
719+ log .debug (f"SAS token: { sas_token } " )
720+ return f"{ blob_client .url } ?{ sas_token } " , script_name
659721
660722def create_setup_task (jobid , appsetupurl ):
661723 log .info (f"Creating setup task for jobid={ jobid } " )
@@ -664,8 +726,7 @@ def create_setup_task(jobid, appsetupurl):
664726 log .critical ("batch_client is None" )
665727 return
666728
667- app_setup_url = appsetupurl
668- log .debug (f"application setup url: { app_setup_url } " )
729+ log .debug (f"application setup url: { appsetupurl } " )
669730
670731 random_code = utils .get_random_code ()
671732 task_id = f"task-app-setup-{ random_code } "
@@ -674,18 +735,37 @@ def create_setup_task(jobid, appsetupurl):
674735 log .critical (f"jobid is None and cannot create setup task { task_id } " )
675736 return
676737
677- script_name = os .path .basename (app_setup_url )
738+ blob_storage_account = env ["BLOBSTORAGEACCOUNT" ]
739+ blob_storage_key = env ["BLOBSTORAGEKEY" ]
740+ blob_container = env ["BLOBCONTAINER" ]
741+
742+
743+ appsetup_is_url = is_url (appsetupurl )
744+ if not appsetup_is_url :
745+ log .debug (f"appsetupurl is not a URL: { appsetupurl } " )
746+ app_setup_url , script_name = upload_file_to_blob (appsetupurl , blob_storage_account , blob_storage_key , blob_container )
747+ resource_files = [ResourceFile (http_url = app_setup_url , file_path = script_name )]
748+ else :
749+ script_name = os .path .basename (appsetupurl )
750+ resource_files = []
751+
678752 log .debug (f"script for application: { script_name } " )
679753
680754 anfmountdir = env ["ANFMOUNTDIR" ]
681-
755+
682756 if anfenabled :
683- task_commands = [
684- f"/bin/bash -c 'set ; cd { anfmountdir } ; curl -sLO { app_setup_url } ; source { script_name } ; { HPCADVISOR_FUNCTION_SETUP } '"
685- ]
757+ if not appsetup_is_url :
758+ task_commands = [
759+ f"/bin/bash -c 'set ; cp { script_name } { anfmountdir } ; cd { anfmountdir } ; sudo chown _azbatch:_azbatchgrp { script_name } ; source { script_name } ; { HPCADVISOR_FUNCTION_SETUP } '"
760+ ]
761+ else :
762+ task_commands = [
763+ f"/bin/bash -c 'set ; cd { anfmountdir } ; curl -sLO { appsetupurl } ; sudo chown _azbatch:_azbatchgrp { script_name } ; source { script_name } ; { HPCADVISOR_FUNCTION_SETUP } '"
764+ ]
686765 else :
766+ # TO FIX url versus file
687767 task_commands = [
688- f"/bin/bash -c 'set ; cd $AZ_BATCH_NODE_MOUNTS_DIR/data ; curl -sLO { app_setup_url } ; source { script_name } ; { HPCADVISOR_FUNCTION_SETUP } '"
768+ f"/bin/bash -c 'set ; cd $AZ_BATCH_NODE_MOUNTS_DIR/data ; curl -sLO { appsetupurl } ; source { script_name } ; { HPCADVISOR_FUNCTION_SETUP } '"
689769 ]
690770
691771 log .debug (f"task command: { task_commands } " )
@@ -701,6 +781,7 @@ def create_setup_task(jobid, appsetupurl):
701781 id = task_id ,
702782 user_identity = user ,
703783 command_line = task_commands [0 ],
784+ resource_files = resource_files ,
704785 )
705786
706787 batch_client .task .add (job_id = jobid , task = task )
@@ -793,7 +874,7 @@ def create_compute_task(
793874 anfmountdir = env ["ANFMOUNTDIR" ]
794875 if anfenabled :
795876 task_commands = [
796- f"/bin/bash -c 'export HOSTLIST_PPN=$(paste -d, -s <(sed \" s/ slots=[0-9]\+/:{ ppn } /\" $HOSTFILE_PATH)); set ; source { anfmountdir } /{ app_run_script } ; cd { fulltaskrundir } ; { HPCADVISOR_FUNCTION_RUN } '" ,
877+ f"/bin/bash -c 'export HOSTLIST_PPN=$(paste -d, -s <(sed \" s/ slots=[0-9]\+/:{ ppn } /\" $HOSTFILE_PATH)); set ; whoami ; source { anfmountdir } /{ app_run_script } ; pwd; ls -l ; cat { anfmountdir } / { app_run_script } ; cd { anfmountdir } ; pwd ; ls -l ; cd { fulltaskrundir } ; { HPCADVISOR_FUNCTION_RUN } '" ,
797878 ]
798879 else :
799880 task_commands = [
0 commit comments