Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.

Commit b98f7b4

Browse files
authored
Merge pull request #91 from microsoft/mabou/patch
Mabou/patch
2 parents 714679c + df7730f commit b98f7b4

10 files changed

Lines changed: 92 additions & 167 deletions

.ci/azure-pipelines-v2.yml

Lines changed: 30 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -1,134 +1,30 @@
1-
# MLHyperparameterTuning Pipeline
2-
3-
trigger: none
4-
5-
variables:
6-
BuildConfiguration: Release
7-
BuildBinariesDirectory: $(Build.BinariesDirectory)
8-
BuildPlatform: any cpu
9-
DotNetCoreBuildVersion: 2.2.108
10-
DotNetRuntimeTarget: ubuntu.18.04-x64
11-
AgentToolsDirectory: $(Agent.ToolsDirectory)
12-
CloudPlatform: AzureCloud
13-
ProductName: Trident
14-
TridentWorkloadType: $(WorkloadType)
15-
TridentWorkloadTypeShort: $(WorkloadTypeShort)
16-
DeployLocation: eastus
17-
Agent: agce-ai
18-
azureSubscription: AG-AzureCAT-AIDevOps-Test-COGSNonProd-IO1685734(0ca618d2-22a8-413a-96d0-0f1b531129c3)
19-
azure_subscription: 0ca618d2-22a8-413a-96d0-0f1b531129c3
20-
21-
jobs:
22-
- job: MLHyperparameterTuningJob
23-
timeoutInMinutes: 300
24-
cancelTimeoutInMinutes: 2
25-
pool:
26-
vmImage: 'Ubuntu-16.04'
27-
28-
steps:
29-
- bash: |
30-
source /usr/share/miniconda/etc/profile.d/conda.sh
31-
which conda
32-
conda env create -f environment.yml
33-
conda env list
34-
conda activate MLHyperparameterTuning
35-
conda env list
36-
echo Login Azure Account
37-
az login -t $(sptenent) --service-principal -u $(spidentity) --password $(spsecret)
38-
echo Try and figure out what account set takes
39-
az account set -h
40-
echo Try and set it.
41-
az account set --subscription $(subscriptionid)
42-
# papermill 01_Data_Prep.ipynb 01_Data_Prep_Output.ipynb --log-output --no-progress-bar -k python3
43-
displayName: 'Configuration'
44-
45-
- bash: |
46-
source /usr/share/miniconda/etc/profile.d/conda.sh
47-
conda activate MLHyperparameterTuning
48-
echo Executing 00_Data_Prep.ipynb
49-
papermill 00_Data_Prep.ipynb 00_Data_Prep_Output.ipynb --log-output --no-progress-bar -k python3
50-
displayName: '00_Data_Prep.ipynb'
51-
52-
- bash: |
53-
source /usr/share/miniconda/etc/profile.d/conda.sh
54-
conda activate MLHyperparameterTuning
55-
echo Executing 01_Training_Script.ipynb
56-
papermill 01_Training_Script.ipynb 01_Training_Script_Output.ipynb --log-output --no-progress-bar -k python3
57-
displayName: '01_Training_Script.ipynb'
58-
59-
- bash: |
60-
source /usr/share/miniconda/etc/profile.d/conda.sh
61-
conda activate MLHyperparameterTuning
62-
echo Executing 02_Testing_Script.ipynb
63-
papermill 02_Testing_Script.ipynb 02_Testing_Script_Output.ipynb --log-output --no-progress-bar -k python3
64-
displayName: '02_Testing_Script.ipynb'
65-
66-
- bash: |
67-
source /usr/share/miniconda/etc/profile.d/conda.sh
68-
conda activate MLHyperparameterTuning
69-
echo Executing 03_Run_Locally.ipynb
70-
papermill 03_Run_Locally.ipynb 03_Run_Locally_Output.ipynb --log-output --no-progress-bar -k python3 -p selected_subscription $(subscriptionid) -p resource_group $(azurergname)
71-
displayName: '03_Run_Locally.ipynb'
72-
73-
- bash: |
74-
source /usr/share/miniconda/etc/profile.d/conda.sh
75-
conda activate MLHyperparameterTuning
76-
echo Executing 04_Hyperparameter_Random_Search.ipynb
77-
papermill 04_Hyperparameter_Random_Search.ipynb 04_Hyperparameter_Random_Search_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns)
78-
displayName: '04_Hyperparameter_Random_Search.ipynb'
79-
80-
- bash: |
81-
source /usr/share/miniconda/etc/profile.d/conda.sh
82-
conda activate MLHyperparameterTuning
83-
echo Executing 05_Train_Best_Model.ipynb
84-
papermill 05_Train_Best_Model.ipynb 05_Train_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3
85-
displayName: '05_Train_Best_Model.ipynb'
86-
87-
- bash: |
88-
source /usr/share/miniconda/etc/profile.d/conda.sh
89-
conda activate MLHyperparameterTuning
90-
echo Executing 06_Test_Best_Model.ipynb
91-
papermill 06_Test_Best_Model.ipynb 06_Test_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3
92-
displayName: '06_Test_Best_Model.ipynb'
93-
94-
- bash: |
95-
source /usr/share/miniconda/etc/profile.d/conda.sh
96-
conda activate MLHyperparameterTuning
97-
echo Executing 07_Train_With_AML_Pipeline.ipynb
98-
papermill 07_Train_With_AML_Pipeline.ipynb 07_Train_With_AML_Pipeline_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns)
99-
displayName: '07_Train_With_AML_Pipeline.ipynb'
100-
101-
- bash: |
102-
source /usr/share/miniconda/etc/profile.d/conda.sh
103-
conda activate MLHyperparameterTuning
104-
echo Executing 08_Tear_Down.ipynb
105-
papermill 08_Tear_Down.ipynb 08_Tear_Down_Output.ipynb --log-output --no-progress-bar -k python3
106-
displayName: '08_Tear_Down.ipynb'
107-
108-
- bash: |
109-
source /usr/share/miniconda/etc/profile.d/conda.sh
110-
conda activate MLHyperparameterTuning
111-
echo Execute Resource Group Delete
112-
existResponse=$(az group exists -n $(azurergname))
113-
if [ "$existResponse" == "true" ]; then
114-
echo Deleting project resource group
115-
az group delete --name $(azurergname) --yes
116-
else
117-
echo Project resource group did not exist
118-
fi
119-
echo Done Cleanup
120-
displayName: 'Backup Cleanup'
121-
condition: or(canceled(),failed())
122-
123-
- task: CreateWorkItem@1
124-
inputs:
125-
workItemType: 'Issue'
126-
title: $(System.TeamProject) - Build $(Build.BuildNumber) Failed
127-
assignedTo: 'Mario Bourgoin <mabou@microsoft.com>'
128-
associate: true
129-
teamProject: $(System.TeamProject)
130-
131-
fieldMappings: |
132-
Description=Branch: Branch $(Build.SourceBranch) failed to build. Go to Boards>WorkItems and tag the failure type.
133-
displayName: 'Create work item on failure'
134-
condition: failed()
1+
# MLHyperparameterTuning Pipeline
2+
#
3+
# A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub"
4+
5+
resources:
6+
repositories:
7+
- repository: aitemplates
8+
type: github
9+
name: microsoft/AI
10+
endpoint: AIArchitecturesAndPractices-GitHub
11+
12+
trigger:
13+
branches:
14+
include:
15+
- master
16+
- mabou/instrument
17+
18+
pr:
19+
autoCancel: true
20+
branches:
21+
include:
22+
- master
23+
- mabou/instrument
24+
25+
stages:
26+
- template: .ci/stages/deploy_notebooks_stages_v2.yml@aitemplates
27+
parameters:
28+
jobDisplayName: MLScoreDeployJob
29+
DefaultWorkingDirectory: $(System.DefaultWorkingDirectory)
30+
workload_vars: ../vars/mlhyperparametertuning_vars.yml

.ci/vars/agce_devops_sub_vars.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
variables:
2+
azure_subscription: 0ca618d2-22a8-413a-96d0-0f1b531129c3
3+
azureSubscription: AG-AzureCAT-AIDevOps-Test-COGSNonProd-IO1685734(0ca618d2-22a8-413a-96d0-0f1b531129c3)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
variables:
2+
DeploymentName: MLScoreDeployJob
3+
TridentWorkloadTypeShort: aimlscore
4+
DeployLocation: eastus
5+
ProjectLocation: "."
6+
PythonPath: "."
7+
Template: MLTrainDeployAMLJob.yml

01_Training_Script.ipynb

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,27 @@
440440
"metadata": {},
441441
"source": [
442442
"## Run the script to see that it works <a id='run'></a>\n",
443-
"This should take around ten minutes."
443+
"Set the effort expended to train the classifier."
444+
]
445+
},
446+
{
447+
"cell_type": "code",
448+
"execution_count": null,
449+
"metadata": {
450+
"tags": [
451+
"parameters"
452+
]
453+
},
454+
"outputs": [],
455+
"source": [
456+
"estimators = 1000"
457+
]
458+
},
459+
{
460+
"cell_type": "markdown",
461+
"metadata": {},
462+
"source": [
463+
"Run the classifier script. This should take about 10 minutes."
444464
]
445465
},
446466
{
@@ -451,7 +471,7 @@
451471
},
452472
"outputs": [],
453473
"source": [
454-
"%run -t scripts/TrainClassifier.py --estimators 1000 --match 5 --ngrams 2 --min_child_samples 10 --save model"
474+
"%run -t scripts/TrainClassifier.py --estimators $estimators --match 5 --ngrams 2 --min_child_samples 10 --save model"
455475
]
456476
},
457477
{

03_Run_Locally.ipynb

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
"metadata": {},
4040
"source": [
4141
"## Azure subscription <a id='subscription'></a>\n",
42-
"If you have multiple subscriptions select the subscription you want to use. You may supply either the subscription's name or the subscription's ID. If you want to run this in a different location that supports HyperDrive, you may enter the one you want to use. You can also set the name of the resource group in which this tutorial will add resources. *IMPORTANT NOTE:* The last notebook in this example will delete this resource group and all associated resources."
42+
"If you have multiple subscriptions select the subscription you want to use. You may supply either the subscription's name or the subscription's ID. If you want to run this in a different location that supports HyperDrive, you may enter the one you want to use. You can also set the name of the resource group in which this tutorial will add resources. *IMPORTANT NOTE:* The last notebook in this example will delete this resource group and all associated resources. We also define the number of estimators to use for the local run."
4343
]
4444
},
4545
{
@@ -55,7 +55,8 @@
5555
"subscription_name=\"YOUR_SUBSCRIPTION_NAME\"\n",
5656
"subscription_id=\"YOUR_SUBSCRIPTION_ID\"\n",
5757
"location=\"eastus\"\n",
58-
"resource_group=\"hypetuning\""
58+
"resource_group=\"hypetuning\"\n",
59+
"estimators = 1000"
5960
]
6061
},
6162
{
@@ -179,10 +180,10 @@
179180
"est = Estimator(source_directory=os.path.join('.', 'scripts'), \n",
180181
" entry_script='TrainClassifier.py',\n",
181182
" script_params={'--data-folder': os.path.abspath('.'),\n",
182-
" '--estimators': '1000',\n",
183-
" '--match': '5',\n",
184-
" '--ngrams': '2',\n",
185-
" '--min_child_samples': '10',\n",
183+
" '--estimators': estimators,\n",
184+
" '--match': 5,\n",
185+
" '--ngrams': 2,\n",
186+
" '--min_child_samples': 10,\n",
186187
" \"--save\": \"local_model\"},\n",
187188
" compute_target='local',\n",
188189
" conda_packages=['pandas==0.23.4',\n",

04_Hyperparameter_Random_Search.ipynb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@
202202
"cell_type": "markdown",
203203
"metadata": {},
204204
"source": [
205-
"This hyperparameter space specifies a grid of 9,360 unique configuration points (4 `ngrams` X 39 `match` X 30 `min_child_samples` X 2 `unweighted`). We control the resources used by the search through specifying a maximum number of configuration points to sample as `max_total_runs`."
205+
"This hyperparameter space specifies a grid of 9,360 unique configuration points (4 `ngrams` X 39 `match` X 30 `min_child_samples` X 2 `unweighted`). We control the resources used by the search through specifying a maximum number of configuration points to sample as `max_total_runs`. We also define the number of estimators to use for each run."
206206
]
207207
},
208208
{
@@ -215,7 +215,8 @@
215215
},
216216
"outputs": [],
217217
"source": [
218-
"max_total_runs = 96"
218+
"max_total_runs = 96\n",
219+
"estimators = 1000"
219220
]
220221
},
221222
{
@@ -270,7 +271,7 @@
270271
"estimator = Estimator(source_directory=os.path.join('.', 'scripts'),\n",
271272
" entry_script='TrainClassifier.py',\n",
272273
" script_params={'--data-folder': ds.as_mount(),\n",
273-
" '--estimators': 1000},\n",
274+
" '--estimators': estimators},\n",
274275
" compute_target=compute_target,\n",
275276
" conda_packages=['pandas==0.23.4',\n",
276277
" 'scikit-learn==0.21.3',\n",

05_Train_Best_Model.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,8 @@
166166
},
167167
"outputs": [],
168168
"source": [
169-
"model_estimators = 8 * int(best_parameters['--estimators'])\n",
170-
"model_estimators"
169+
"estimators = 8 * int(best_parameters['--estimators'])\n",
170+
"estimators"
171171
]
172172
},
173173
{
@@ -186,7 +186,7 @@
186186
"ds = ws.get_default_datastore()\n",
187187
"model_parameters = best_parameters.copy()\n",
188188
"model_parameters['--data-folder'] = ds.as_mount()\n",
189-
"model_parameters['--estimators'] = model_estimators\n",
189+
"model_parameters['--estimators'] = estimators\n",
190190
"model_parameters['--save'] = 'FAQ_ranker'\n",
191191
"pd.Series(model_parameters, name='Value').to_frame()"
192192
]

07_Train_With_AML_Pipeline.ipynb

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@
292292
"metadata": {},
293293
"source": [
294294
"## Create AML Pipeline Tuning Step <a id='aml_pipeline_tune_step'></a>\n",
295-
"We create a HyperDrive step in the AML pipeline to perform a search for hyperparameters. The `tune_estimators` pipeline parameter that controls the number of estimators used in tuning deliberately has a low default value for the speed of pipeline testing. The `tune_steps_data` output pipeline data is only used to synchronize with the next pipeline step."
295+
"We create a HyperDrive step in the AML pipeline to perform a search for hyperparameters. The `tune_estimators` pipeline parameter that controls the number of estimators used in tuning deliberately has a low default value for the speed of pipeline testing."
296296
]
297297
},
298298
{
@@ -302,15 +302,13 @@
302302
"outputs": [],
303303
"source": [
304304
"tune_step_name=\"tune_model\"\n",
305-
"tune_steps_data = PipelineData(\"tune_steps_data\", datastore=ds)\n",
306305
"tune_estimators = PipelineParameter(name=\"tune_estimators\", default_value=1) # Set to 1000 when running the pipeline.\n",
307306
"tune_step = HyperDriveStep(\n",
308307
" name=tune_step_name,\n",
309308
" hyperdrive_config=hyperdrive_run_config,\n",
310309
" estimator_entry_script_arguments=[\"--data-folder\", data_folder,\n",
311310
" \"--estimators\", tune_estimators],\n",
312311
" inputs=[data_folder],\n",
313-
" outputs=[tune_steps_data],\n",
314312
" allow_reuse=False)"
315313
]
316314
},
@@ -404,7 +402,7 @@
404402
"cell_type": "markdown",
405403
"metadata": {},
406404
"source": [
407-
"Creating PythonScript Step for AML pipeline to get the best run's hyperparameters. The `tune_steps_data` input pipeline data is only used to synchronize with the previous pipeline step."
405+
"Creating PythonScript Step for AML pipeline to get the best run's hyperparameters."
408406
]
409407
},
410408
{
@@ -428,18 +426,18 @@
428426
" arguments=[\"--hd-step\", tune_step_name,\n",
429427
" \"--output-steps-data\", bh_steps_data,\n",
430428
" \"--hyperparameters\", bh_hyperparameters_file],\n",
431-
" inputs=[tune_steps_data],\n",
432429
" outputs=[bh_steps_data],\n",
433430
" runconfig=bh_run_config,\n",
434-
" allow_reuse=False)"
431+
" allow_reuse=False)\n",
432+
"bh_step.run_after(tune_step)"
435433
]
436434
},
437435
{
438436
"cell_type": "markdown",
439437
"metadata": {},
440438
"source": [
441439
"## Create AML Pipeline Best Model Step <a id='aml_pipeline_estimator_step'></a>\n",
442-
"This step passes the hyperparameters file from the previous step to the training script to create the best model. The `best_estimators` pipeline parameter that controls the number of estimators used in getting the best model deliberately has a low default value for the speed of pipeline testing. The `bm_steps_data` output pipeline data is only used to synchronize with the next pipeline step."
440+
"This step passes the hyperparameters file from the previous step to the training script to create the best model. The `best_estimators` pipeline parameter that controls the number of estimators used in getting the best model deliberately has a low default value for the speed of pipeline testing."
443441
]
444442
},
445443
{
@@ -449,7 +447,6 @@
449447
"outputs": [],
450448
"source": [
451449
"bm_step_name=\"best_model\"\n",
452-
"bm_steps_data = PipelineData(\"bm_steps_data\", datastore=ds)\n",
453450
"bm_estimators = PipelineParameter(name=\"best_estimators\", default_value=1) # Set to 8000 when running the pipeline\n",
454451
"bm_estimator = Estimator(source_directory=os.path.join('.', 'scripts'), # Use a new Estimator as a bug workaround\n",
455452
" entry_script='TrainClassifier.py',\n",
@@ -467,7 +464,6 @@
467464
" \"--save\", model_name],\n",
468465
" compute_target=compute_target,\n",
469466
" inputs=[data_folder, bh_steps_data],\n",
470-
" outputs=[bm_steps_data],\n",
471467
" allow_reuse=False)"
472468
]
473469
},
@@ -532,7 +528,7 @@
532528
"cell_type": "markdown",
533529
"metadata": {},
534530
"source": [
535-
"Creating PythonScript Step for AML pipeline to register the best model. The `bm_steps_data` input pipeline data is only used to synchronize with the previous pipeline step."
531+
"Creating PythonScript Step for AML pipeline to register the best model."
536532
]
537533
},
538534
{
@@ -554,9 +550,9 @@
554550
" arguments=[\"--es-step\", bm_step_name,\n",
555551
" \"--outputs\", \"outputs\",\n",
556552
" \"--model-name\", model_name],\n",
557-
" inputs=[bm_steps_data],\n",
558553
" runconfig=rm_run_config,\n",
559-
" allow_reuse=False)"
554+
" allow_reuse=False)\n",
555+
"rm_step.run_after(bm_step)"
560556
]
561557
},
562558
{
@@ -671,7 +667,7 @@
671667
"name": "python",
672668
"nbconvert_exporter": "python",
673669
"pygments_lexer": "ipython3",
674-
"version": "3.7.3"
670+
"version": "3.6.7"
675671
}
676672
},
677673
"nbformat": 4,

0 commit comments

Comments
 (0)