1414 - " master"
1515 - " release/*"
1616
17- variables :
18- - name : continue
19- value : ' 1'
20-
2117jobs :
2218 - job : testing
2319 # how long to run the job before automatically cancelling
2420 timeoutInMinutes : " 10"
2521 # how much time to give 'run always even if cancelled tasks' before stopping them
2622 cancelTimeoutInMinutes : " 2"
27- pool : intel-hpus
23+ pool : habana-gaudi-hpus
24+ container :
25+ image : " vault.habana.ai/gaudi-docker/1.5.0/ubuntu20.04/habanalabs/pytorch-installer-1.11.0:latest"
26+ options : " --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host --shm-size=4g --name cd-container -v /usr/bin/docker:/tmp/docker:ro"
2827 workspace :
2928 clean : all
3029
3130 steps :
32- - bash : |
33- CHANGED_FILES=$(git diff --name-status origin/master -- . | awk '{print $2}')
34- FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*'
35- echo $CHANGED_FILES > changed_files.txt
36- MATCHES=$(cat changed_files.txt | grep -E $FILTER)
37- echo $MATCHES
38- if [ -z "$MATCHES" ]; then
39- echo "Skip"
40- echo "##vso[task.setvariable variable=continue]0"
41- else
42- echo "Continue"
43- echo "##vso[task.setvariable variable=continue]1"
44- fi
45- displayName: Skipper
31+ - script : |
32+ /tmp/docker exec -t -u 0 cd-container \
33+ sh -c "apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -o Dpkg::Options::="--force-confold" -y install sudo"
34+ displayName: 'Install Sudo in container (thanks Microsoft!)'
4635
4736 - bash : |
48- apt-get install -y hwinfo
37+ sudo apt-get install -y hwinfo
4938 hwinfo --short
39+ python --version
40+ sudo pip install pip -U
5041 displayName: 'Instance HW info'
51- condition: eq(variables['continue'], '1')
5242
5343 - bash : |
54- pip install -e .[extra] -r requirements/pytorch/test.txt
44+ set -e
45+ pip --version
46+ sudo pip uninstall -y lightning pytorch-lightning
47+ pip install fire
48+ python .actions/assistant.py requirements-prune-pkgs torch,torchvision,torchtext
49+ pip install ".[extra,test]"
50+ pip list
5551 env:
5652 PACKAGE_NAME: pytorch
5753 FREEZE_REQUIREMENTS: 1
5854 displayName: 'Install dependencies'
59- condition: eq(variables['continue'], '1')
55+
56+ - bash : |
57+ hl-smi -L
58+ lsmod | grep habanalabs
59+ displayName: 'Check the driver status'
6060
6161 - bash : |
6262 python -m pytest -sv accelerators/test_hpu.py --forked --junitxml=hpu1_test-results.xml
6363 workingDirectory: tests/tests_pytorch
6464 displayName: 'Single card HPU test'
65- condition: eq(variables['continue'], '1')
6665
6766 - bash : |
6867 python -m pytest -sv accelerators/test_hpu.py --forked --hpus 8 --junitxml=hpu8_test-results.xml
6968 workingDirectory: tests/tests_pytorch
7069 displayName: 'Multi card(8) HPU test'
71- condition: eq(variables['continue'], '1')
7270
7371 - bash : |
7472 python -m pytest -sv plugins/precision/hpu/test_hpu.py --hmp-bf16 \
@@ -77,18 +75,16 @@ jobs:
7775 --junitxml=hpu1_precision_test-results.xml
7876 workingDirectory: tests/tests_pytorch
7977 displayName: 'HPU precision test'
80- condition: eq(variables['continue'], '1')
8178
8279 - bash : |
8380 export PYTHONPATH="${PYTHONPATH}:$(pwd)"
8481 python "pl_hpu/mnist_sample.py"
8582 workingDirectory: examples
8683 displayName: 'Testing: HPU examples'
87- condition: eq(variables['continue'], '1')
8884
8985 - task : PublishTestResults@2
9086 inputs :
9187 testResultsFiles : ' hpu*_test-results.xml'
9288 testRunTitle : ' $(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
93- condition : and( succeededOrFailed(), eq(variables['continue'], '1') )
89+ condition : succeededOrFailed()
9490 displayName : ' Publish test results'
0 commit comments