-
Notifications
You must be signed in to change notification settings - Fork 353
178 lines (153 loc) · 7.71 KB
/
ci_gpu.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
name: CI_GPU
on:
push:
branches:
- main
pull_request:
branches:
- main
jobs:
# Temporarily disable this test since there is no server including multiple GPUs.
# unittest_multi_gpu:
# runs-on: 4-core-ubuntu-gpu-t4
# steps:
# - name: Checkout
# uses: actions/checkout@v2
# - name: Display Python version
# run: python3 -c "import sys; print(sys.version)"
# - name: Set up Python
# uses: actions/setup-python@v2
# with:
# python-version: '3.x'
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# ./scripts/install_via_pip.sh -c
# - name: Run multi-GPU unit tests
# run: |
# python3 -m unittest opacus.tests.multigpu_gradcheck.GradientComputationTest.test_gradient_correct
integrationtest_py39_torch_release_cuda:
runs-on: 4-core-ubuntu-gpu-t4
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.9'
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
pip install pytest coverage coveralls
./scripts/install_via_pip.sh -c
# Cuda dependency has already been installed when installing PyTorch, so no need to re-install it.
# https://discuss.pytorch.org/t/should-i-install-the-extra-cudatoolkit-and-cudnn/194528
# Cuda installation guide: https://medium.com/@milistu/how-to-install-cuda-cudnn-7e4a00ae4f44
# - name: Install CUDA toolkit and cuDNN
# run: |
# sudo apt-get update
# sudo apt-get install -y --no-install-recommends \
# cuda-toolkit-11-1 \
# libcudnn8=8.1.1.33-1+cuda11.1 \
# libcudnn8-dev=8.1.1.33-1+cuda11.1
- name: Run MNIST integration test (CUDA)
run: |
nvidia-smi
mkdir -p runs/mnist/data
mkdir -p runs/mnist/test-reports
python -c "import torch; exit(0) if torch.cuda.is_available() else exit(1)"
python examples/mnist.py --lr 0.25 --sigma 0.7 -c 1.5 --batch-size 64 --epochs 1 --data-root runs/mnist/data --n-runs 1 --device cuda
python -c "import torch; accuracy = torch.load('run_results_mnist_0.25_0.7_1.5_64_1.pt'); exit(0) if (accuracy[0]>0.78 and accuracy[0]<0.95) else exit(1)"
- name: Store MNIST test results
uses: actions/upload-artifact@v4
with:
name: mnist-gpu-reports
path: runs/mnist/test-reports
- name: Run CIFAR10 integration test (CUDA)
run: |
mkdir -p runs/cifar10/data
mkdir -p runs/cifar10/logs
mkdir -p runs/cifar10/test-reports
pip install tensorboard
python examples/cifar10.py --lr 0.1 --sigma 1.5 -c 10 --batch-size 2000 --epochs 10 --data-root runs/cifar10/data --log-dir runs/cifar10/logs --device cuda
python -c "import torch; model = torch.load('model_best.pth.tar'); exit(0) if (model['best_acc1']>0.4 and model['best_acc1']<0.49) else exit(1)"
python examples/cifar10.py --lr 0.1 --sigma 1.5 -c 10 --batch-size 2000 --epochs 10 --data-root runs/cifar10/data --log-dir runs/cifar10/logs --device cuda --grad_sample_mode no_op
python -c "import torch; model = torch.load('model_best.pth.tar'); exit(0) if (model['best_acc1']>0.4 and model['best_acc1']<0.49) else exit(1)"
- name: Store CIFAR10 test results
uses: actions/upload-artifact@v4
with:
name: cifar10-gpu-reports
path: runs/cifar10/test-reports
# To save resouces, there is no need to run all the tests.
# - name: Run IMDb integration test (CUDA)
# run: |
# mkdir -p runs/imdb/data
# mkdir -p runs/imdb/test-reports
# pip install --user datasets transformers
# python examples/imdb.py --lr 0.02 --sigma 1.0 -c 1.0 --batch-size 64 --max-sequence-length 256 --epochs 2 --data-root runs/imdb/data --device cuda
# python -c "import torch; accuracy = torch.load('run_results_imdb_classification.pt'); exit(0) if (accuracy>0.54 and accuracy<0.66) else exit(1)"
# - name: Store IMDb test results
# uses: actions/upload-artifact@v4
# with:
# name: imdb-gpu-reports
# path: runs/imdb/test-reports
# - name: Run charlstm integration test (CUDA)
# run: |
# mkdir -p runs/charlstm/data
# wget https://download.pytorch.org/tutorial/data.zip -O runs/charlstm/data/data.zip
# unzip runs/charlstm/data/data.zip -d runs/charlstm/data
# rm runs/charlstm/data/data.zip
# mkdir -p runs/charlstm/test-reports
# pip install scikit-learn
# python examples/char-lstm-classification.py --epochs=20 --learning-rate=2.0 --hidden-size=128 --delta=8e-5 --batch-size 400 --n-layers=1 --sigma=1.0 --max-per-sample-grad-norm=1.5 --data-root="runs/charlstm/data/data/names/" --device cuda --test-every 5
# python -c "import torch; accuracy = torch.load('run_results_chr_lstm_classification.pt'); exit(0) if (accuracy>0.60 and accuracy<0.80) else exit(1)"
# - name: Store test results
# uses: actions/upload-artifact@v4
# with:
# name: charlstm-gpu-reports
# path: runs/charlstm/test-reports
# We will have new benchmarks for Ghost Clipping.
# micro_benchmarks_py39_torch_release_cuda:
# runs-on: ubuntu-latest
# needs: [integrationtest_py39_torch_release_cuda]
# container:
# # https://hub.docker.com/r/nvidia/cuda
# image: nvidia/cuda:12.3.1-base-ubuntu22.04
# options: --gpus all
# env:
# TZ: 'UTC'
# steps:
# - name: Checkout
# uses: actions/checkout@v2
# - name: Set up Python
# uses: actions/setup-python@v2
# with:
# python-version: 3.9
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install pytest coverage coveralls
# ./scripts/install_via_pip.sh
# - name: Install CUDA toolkit and cuDNN
# run: |
# apt-get update
# apt-get install -y --no-install-recommends \
# cuda-toolkit-11-1 \
# libcudnn8=8.1.1.33-1+cuda11.1 \
# libcudnn8-dev=8.1.1.33-1+cuda11.1
# - name: Run benchmark integration tests (CUDA)
# run: |
# mkdir -p benchmarks/results/raw
# python benchmarks/run_benchmarks.py --batch_size 16 --layers "groupnorm instancenorm layernorm" --config_file ./benchmarks/config.json --root ./benchmarks/results/raw/ --cont
# IFS=$' ';layers=("groupnorm" "instancenorm" "layernorm"); rm -rf /tmp/report_layers; mkdir -p /tmp/report_layers; IFS=$'\n'; files=`( echo "${layers[*]}" ) | sed 's/.*/.\/benchmarks\/results\/raw\/&*/'`
# cp -v ${files[@]} /tmp/report_layers
# report_id=`IFS=$'-'; echo "${layers[*]}"`
# python benchmarks/generate_report.py --path-to-results /tmp/report_layers --save-path benchmarks/results/report-${report_id}.csv --format csv
# python benchmarks/generate_report.py --path-to-results /tmp/report_layers --save-path benchmarks/results/report-${report_id}.pkl --format pkl
# python benchmarks/check_threshold.py --report-path "./benchmarks/results/report-"$report_id".pkl" --metric runtime --threshold 3.0 --column "hooks/baseline"
# python benchmarks/check_threshold.py --report-path "./benchmarks/results/report-"$report_id".pkl" --metric memory --threshold 1.6 --column "hooks/baseline"
# - name: Store artifacts
# uses: actions/upload-artifact@v2
# with:
# name: benchmarks-reports
# path: benchmarks/results/