Skip to content

Commit

Permalink
Merge pull request #99 from andreped/tf-fix
Browse files Browse the repository at this point in the history
Added model wrapper test for tf<2.8 + refactored tests
  • Loading branch information
andreped authored May 11, 2023
2 parents 0bc538c + 783cf71 commit af5dc34
Show file tree
Hide file tree
Showing 16 changed files with 576 additions and 232 deletions.
42 changes: 42 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,45 @@ jobs:
pytest -v tests/test_mp_batch_norm.py
pytest -v tests/test_optimizer_distribute.py
pytest -v tests/test_model_distribute.py
tf-compability:
needs: build
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-20.04]
python-version: ["3.6"]
tf-version: [2.2.0, 2.3.0, 2.4.0, 2.5.0, 2.6.2]

steps:
- uses: actions/checkout@v1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: pip install wheel setuptools flake8 pytest-cov

- name: Install tensorflow-datasets
run: |
pip install tensorflow==${{ matrix.tf-version }} "tensorflow-datasets<=4.8.2"
pip install "protobuf<=3.20" --force-reinstall
- name: Download artifact
uses: actions/download-artifact@master
with:
name: "Python wheel"

- name: Install wheel
run: pip install --find-links=${{github.workspace}} gradient_accumulator

- name: Debug pip deps
run: pip list

- name: Test library accessibility
run: python -c "from gradient_accumulator import GradientAccumulateModel, GradientAccumulateOptimizer"

- name: Run tests
run: pytest -v tests/test_model_expected_result.py
42 changes: 30 additions & 12 deletions tests/test_adaptive_gradient_clipping.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import os

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import mixed_precision
from tensorflow.keras.models import load_model

from gradient_accumulator import GradientAccumulateModel
from gradient_accumulator import unitwise_norm
from tensorflow.keras import mixed_precision
import os

from .utils import normalize_img


def test_unitwise_norm():
for i in range(7):
x = tf.zeros([1,] * i)
x = tf.zeros(
[
1,
]
* i
)
try:
unitwise_norm(x)
except ValueError as e:
Expand All @@ -22,8 +30,8 @@ def test_unitwise_norm():
def test_train_mnist():
# load dataset
(ds_train, ds_test), ds_info = tfds.load(
'mnist',
split=['train', 'test'],
"mnist",
split=["train", "test"],
shuffle_files=True,
as_supervised=True,
with_info=True,
Expand All @@ -35,7 +43,7 @@ def test_train_mnist():
# build train pipeline
ds_train = ds_train.map(normalize_img)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
ds_train = ds_train.batch(100) # multiplum of 8
ds_train = ds_train.prefetch(1)

Expand All @@ -46,14 +54,24 @@ def test_train_mnist():
ds_test = ds_test.prefetch(1)

# create model
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(32, activation='relu'), # 32 multiplum of 8
tf.keras.layers.Dense(10, dtype='float32') # output not numerically stable with float16
])
model = tf.keras.models.Sequential(
[
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(32, activation="relu"), # 32 multiplum of 8
tf.keras.layers.Dense(
10, dtype="float32"
), # output not numerically stable with float16
]
)

# wrap model to use gradient accumulation
model = GradientAccumulateModel(accum_steps=4, mixed_precision=False, use_agc=True, inputs=model.input, outputs=model.output)
model = GradientAccumulateModel(
accum_steps=4,
mixed_precision=False,
use_agc=True,
inputs=model.input,
outputs=model.output,
)

# need to scale optimizer for mixed precision
opt = tf.keras.optimizers.SGD(1e-2)
Expand Down
50 changes: 30 additions & 20 deletions tests/test_batch_norm.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,33 @@
import os
import random as python_random

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.models import load_model

from gradient_accumulator import GradientAccumulateModel
from gradient_accumulator.layers import AccumBatchNormalization
import random as python_random
import numpy as np
import os
from .utils import reset, normalize_img

from .utils import normalize_img
from .utils import reset


def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epochs:int = 3):
def run_experiment(
custom_bn: bool = True, bs: int = 100, accum_steps: int = 1, epochs: int = 3
):
# load dataset
(ds_train, ds_test), ds_info = tfds.load(
'mnist',
split=['train', 'test'],
"mnist",
split=["train", "test"],
shuffle_files=True,
as_supervised=True,
with_info=True,
)

# build train pipeline
ds_train = ds_train.map(normalize_img)
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
ds_train = ds_train.batch(bs)
ds_train = ds_train.prefetch(1)

Expand All @@ -39,17 +45,21 @@ def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epo
normalization_layer = tf.keras.layers.Activation("linear")

# create model
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(32),
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
tf.keras.layers.Activation("relu"),
tf.keras.layers.Dense(10)
])
model = tf.keras.models.Sequential(
[
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(32),
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
tf.keras.layers.Activation("relu"),
tf.keras.layers.Dense(10),
]
)

# wrap model to use gradient accumulation
if accum_steps > 1:
model = GradientAccumulateModel(accum_steps=accum_steps, inputs=model.input, outputs=model.output)
model = GradientAccumulateModel(
accum_steps=accum_steps, inputs=model.input, outputs=model.output
)

# compile model
model.compile(
Expand Down Expand Up @@ -79,10 +89,10 @@ def run_experiment(custom_bn:bool = True, bs:int = 100, accum_steps:int = 1, epo
def test_compare_bn_layers():
# set seed
reset()

# custom BN without accum
result1 = run_experiment(custom_bn=True, accum_steps=1, epochs=3)[1]

# reset before second run to get "identical" results
reset()

Expand All @@ -98,10 +108,10 @@ def test_compare_bn_layers():
def test_compare_accum_bn_expected_result():
# set seed
reset()

# custom BN without accum
result1 = run_experiment(custom_bn=True, accum_steps=4, bs=25)[1]

# reset before second run to get "identical" results
reset()

Expand Down
63 changes: 38 additions & 25 deletions tests/test_bn_convnd.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

from gradient_accumulator import GradientAccumulateModel
from gradient_accumulator.layers import AccumBatchNormalization
import numpy as np


def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
def test_bn_conv2d(
custom_bn: bool = True, accum_steps: int = 1, epochs: int = 1
):
# make toy dataset
data = np.random.randint(2, size=(16, 8, 8, 1))
gt = np.expand_dims(np.random.randint(2, size=16), axis=-1)
Expand All @@ -19,20 +22,24 @@ def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
normalization_layer = tf.keras.layers.Activation("linear")

# create model
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(4, 3, input_shape=(8, 8, 1)),
normalization_layer,
tf.keras.layers.Activation("relu"),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(4),
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
tf.keras.layers.Activation("relu"),
tf.keras.layers.Dense(1, activation="sigmoid"),
])
model = tf.keras.models.Sequential(
[
tf.keras.layers.Conv2D(4, 3, input_shape=(8, 8, 1)),
normalization_layer,
tf.keras.layers.Activation("relu"),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(4),
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
tf.keras.layers.Activation("relu"),
tf.keras.layers.Dense(1, activation="sigmoid"),
]
)

# wrap model to use gradient accumulation
if accum_steps > 1:
model = GradientAccumulateModel(accum_steps=accum_steps, inputs=model.input, outputs=model.output)
model = GradientAccumulateModel(
accum_steps=accum_steps, inputs=model.input, outputs=model.output
)

# compile model
model.compile(
Expand Down Expand Up @@ -60,7 +67,9 @@ def test_bn_conv2d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
return result


def test_bn_conv3d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
def test_bn_conv3d(
custom_bn: bool = True, accum_steps: int = 1, epochs: int = 1
):
# make toy dataset
data = np.random.randint(2, size=(16, 8, 8, 8, 1))
gt = np.expand_dims(np.random.randint(2, size=16), axis=-1)
Expand All @@ -74,20 +83,24 @@ def test_bn_conv3d(custom_bn:bool = True, accum_steps:int = 1, epochs:int = 1):
normalization_layer = tf.keras.layers.Activation("linear")

# create model
model = tf.keras.models.Sequential([
tf.keras.layers.Conv3D(4, 3, input_shape=(8, 8, 8, 1)),
normalization_layer,
tf.keras.layers.Activation("relu"),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(4),
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
tf.keras.layers.Activation("relu"),
tf.keras.layers.Dense(1, activation="sigmoid"),
])
model = tf.keras.models.Sequential(
[
tf.keras.layers.Conv3D(4, 3, input_shape=(8, 8, 8, 1)),
normalization_layer,
tf.keras.layers.Activation("relu"),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(4),
normalization_layer, # @TODO: BN before or after ReLU? Leads to different performance
tf.keras.layers.Activation("relu"),
tf.keras.layers.Dense(1, activation="sigmoid"),
]
)

# wrap model to use gradient accumulation
if accum_steps > 1:
model = GradientAccumulateModel(accum_steps=accum_steps, inputs=model.input, outputs=model.output)
model = GradientAccumulateModel(
accum_steps=accum_steps, inputs=model.input, outputs=model.output
)

# compile model
model.compile(
Expand Down
27 changes: 17 additions & 10 deletions tests/test_expected_result.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import os
import random as python_random

import numpy as np
import tensorflow as tf
import random as python_random
import os
from .utils import get_opt, normalize_img, reset
import tensorflow_datasets as tfds
from tensorflow.keras.models import load_model
from gradient_accumulator import GradientAccumulateModel, GradientAccumulateOptimizer

from gradient_accumulator import GradientAccumulateModel
from gradient_accumulator import GradientAccumulateOptimizer

from .utils import get_opt
from .utils import normalize_img
from .utils import reset

# get current tf minor version
tf_version = int(tf.version.VERSION.split(".")[1])
Expand All @@ -15,8 +20,8 @@
def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
# load dataset
(ds_train, ds_test), ds_info = tfds.load(
'mnist',
split=['train', 'test'],
"mnist",
split=["train", "test"],
shuffle_files=True,
as_supervised=True,
with_info=True,
Expand All @@ -35,7 +40,7 @@ def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
# create model
input = tf.keras.layers.Input(shape=(28, 28))
x = tf.keras.layers.Flatten(input_shape=(28, 28))(input)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(128, activation="relu")(x)
output = tf.keras.layers.Dense(10)(x)

opt = get_opt(opt_name="SGD", tf_version=tf_version)
Expand All @@ -45,14 +50,16 @@ def run_experiment(bs=50, accum_steps=2, epochs=1, modeloropt="opt"):
else:
if modeloropt == "model":
# wrap model to use gradient accumulation
model = GradientAccumulateModel(accum_steps=accum_steps, inputs=input, outputs=output)
model = GradientAccumulateModel(
accum_steps=accum_steps, inputs=input, outputs=output
)
else:
# wrap optimizer to use gradient accumulation
opt = GradientAccumulateOptimizer(opt, accum_steps=accum_steps)

# compile model
model = tf.keras.Model(inputs=input, outputs=output)

# compile model
model.compile(
optimizer=opt,
Expand Down Expand Up @@ -91,7 +98,7 @@ def test_expected_result():

# run again with different batch size and number of accumulations
result2 = run_experiment(bs=50, accum_steps=2, epochs=2, modeloropt="opt")

# reset again
reset()

Expand Down
Loading

0 comments on commit af5dc34

Please sign in to comment.