Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix] ensure shape in the safe_embedding_lookup #472

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/make_wheel_Linux_x86.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ if [[ "$TF_VERSION" =~ ^2\.1[3-9]\.[0-9]$ ]] ; then
export PROTOBUF_VERSION='4.23.4'
fi

docker system prune -f

DOCKER_BUILDKIT=1 docker build --no-cache \
-f tools/docker/build_wheel.Dockerfile \
--output type=local,dest=wheelhouse \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,62 @@
try:
from tf_keras.initializers import Zeros
from tf_keras.optimizers import Adam
from tf_keras import Input, Model, layers
except:
from tensorflow.keras.initializers import Zeros
from tensorflow.keras import Input, Model, layers
try:
from tensorflow.keras.optimizers import Adam
except:
from tensorflow.keras.legacy.optimizers import Adam


class MyModel(layers.Layer):

def __init__(self):
super().__init__()
self.embeddings = de.get_variable(name="p1")

@tf.function
def call(self, indices_input, ids_input):
shape_input = tf.constant([2, 3, 4], dtype=tf.int64)

sparse_ids = tf.sparse.SparseTensor(indices=indices_input,
values=ids_input,
dense_shape=shape_input)

embeddings_result = de.safe_embedding_lookup_sparse(self.embeddings,
sparse_ids,
name="safe_sp_emb",
return_trainable=False)
return embeddings_result


# class MyModel(tf.Module):
# def __init__(self):
# super().__init__()
# # Create the variable as an attribute of the module
# self.embeddings = de.get_variable(name="p1")
#
# @tf.function
# def __call__(self, indices_input, ids_input):
# shape_input = tf.constant([2, 3, 4], dtype=tf.int64)
#
# sparse_ids = tf.sparse.SparseTensor(
# indices=indices_input,
# values=ids_input,
# dense_shape=shape_input
# )
#
# embeddings_result = de.safe_embedding_lookup_sparse(
# self.embeddings,
# sparse_ids,
# name="safe_sp_emb",
# return_trainable=False
# )
# return embeddings_result


# pylint: disable=missing-class-docstring
# pylint: disable=missing-function-docstring
def _type_converter(tf_type):
Expand Down Expand Up @@ -567,6 +615,22 @@ def test_scope_reuse_sparse_embedding_lookup(self):
self.assertAllEqual(p1_reuse._tables[0].name, "test_p1_mht_1of1")
self.assertAllEqual(p2._tables[0].name, "test_p2_mht_1of1")

def test_keras_input_safe_sparse_embedding_lookup(self):
indices_data = tf.constant([
[0, 0, 0],
[0, 0, 1],
[0, 0, 2],
[0, 1, 0],
[1, 0, 0],
[1, 1, 0],
[1, 1, 1],
],
name="indices_data",
dtype=tf.int64)
ids_data = tf.constant([0, 1, -1, -1, 2, 0, 1], name="ids", dtype=tf.int64)
model = MyModel()
# output = model(indices_data, ids_data)

def test_scope_reuse_safe_sparse_embedding_lookup(self):
indices = [
[0, 0, 0],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.framework import test_util
from tensorflow.keras import layers
try:
from tensorflow.keras import layers
except:
from tf_keras import layers
from tensorflow.python.keras import optimizer_v2
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@
from tensorflow_recommenders_addons.utils.check_platform import is_windows, is_macos, is_arm64, is_linux, is_raspi_arm

from tensorflow.core.protobuf import config_pb2
from tensorflow.keras import layers
try:
from tensorflow.keras import layers
except:
from tf_keras import layers

from tensorflow.python.client import session
from tensorflow.python.eager import context
from tensorflow.python.framework import constant_op
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def verify_embedding_weights(self, sparse_ids, sparse_weights=None):
def embedding_lookup(self,
ids,
name=None,
max_norm=None) -> (tf.Tensor, EmbeddingWeights):
max_norm=None,
return_trainable=False) -> (tf.Tensor, EmbeddingWeights):
raise NotImplementedError("embedding_lookup is not supported in "
"DistributedVariableWrapper")
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,17 @@ def embedding_lookup_unique(params,
ids_flat = array_ops.reshape(ids, math_ops.reduce_prod(shape,
keepdims=True))
unique_ids, idx = array_ops.unique(ids_flat)
unique_embeddings, trainable_ = de.embedding_lookup(
params,
unique_ids,
partition_strategy=partition_strategy,
name=name,
validate_indices=None,
max_norm=validate_indices,
return_trainable=True)
result = de.embedding_lookup(params,
unique_ids,
partition_strategy=partition_strategy,
name=name,
validate_indices=None,
max_norm=validate_indices,
return_trainable=return_trainable)
if return_trainable:
unique_embeddings, trainable_ = result
else:
unique_embeddings = result
embeddings_flat = array_ops.gather(unique_embeddings, idx)
embeddings_shape = array_ops.concat(
[shape, array_ops.shape(unique_embeddings)[1:]], 0)
Expand Down Expand Up @@ -216,9 +219,12 @@ def embedding_lookup_sparse(

ids = sp_ids.values
ids, idx = array_ops.unique(ids)
embeddings, trainable_ = params.embedding_lookup(ids,
name=name,
max_norm=max_norm)
embeddings = params.embedding_lookup(ids,
name=name,
max_norm=max_norm,
return_trainable=return_trainable)
if return_trainable:
embeddings, trainable_ = embeddings

if embeddings.dtype in (dtypes.float16, dtypes.bfloat16):
embeddings = math_ops.cast(embeddings, dtypes.float32)
Expand Down Expand Up @@ -372,16 +378,18 @@ def safe_embedding_lookup_sparse(
if sparse_weights is not None:
sparse_weights, _ = de.math.sparse_fill_empty_rows(sparse_weights, 1.0)

result, trainable_ = embedding_lookup_sparse(
result = embedding_lookup_sparse(
embedding_weights,
sparse_ids,
sparse_weights,
combiner=combiner,
partition_strategy=partition_strategy,
name=name + "/embedding_lookup_sparse",
max_norm=max_norm,
return_trainable=True,
return_trainable=return_trainable,
)
if (return_trainable):
result, trainable_ = result

if default_id is None:
# Broadcast is_row_empty to the same shape as embedding_lookup_result,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -692,13 +692,14 @@ def verify_embedding_weights(self, sparse_ids, sparse_weights=None):
def embedding_lookup(self,
ids,
name=None,
max_norm=None) -> (tf.Tensor, EmbeddingWeights):
max_norm=None,
return_trainable=False) -> (tf.Tensor, EmbeddingWeights):
return embedding_lookup(
self,
ids,
name=name + '/embedding_lookup',
max_norm=max_norm,
return_trainable=True,
return_trainable=return_trainable,
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ def verify_embedding_weights(self, sparse_ids, sparse_weights=None):
def embedding_lookup(self,
ids,
name=None,
max_norm=None) -> (tf.Tensor, "EmbeddingWeights"):
max_norm=None,
return_trainable=False
) -> (tf.Tensor, "EmbeddingWeights"):
"""
embedding lookup, and store the result. No by-product will
be introduced in this call. So it can be decorated by `tf.function`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def _embedding_lookup_sparse_impl(
segment_ids = ops.convert_to_tensor(segment_ids, name="segment_ids")

ids, idx = array_ops.unique(ids)
embeddings, _ = params.embedding_lookup(ids, name=name)
embeddings = params.embedding_lookup(ids, name=name)
if not ignore_weights:
if segment_ids.dtype != dtypes.int32:
segment_ids = math_ops.cast(segment_ids, dtypes.int32)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,12 @@ def verify_embedding_weights(self, sparse_ids, sparse_weights=None):
def embedding_lookup(self,
ids,
name=None,
max_norm=None) -> (tf.Tensor, EmbeddingWeights):
return embedding_lookup(self, ids, name), self
max_norm=None,
return_trainable=False) -> (tf.Tensor, EmbeddingWeights):
if return_trainable:
return embedding_lookup(self, ids, name, False), self
else:
return embedding_lookup(self, ids, name)

def prefetch_values(self, update=False):
if self.params.bp_v2:
Expand Down Expand Up @@ -443,5 +447,9 @@ def __alltoall_embedding_lookup__(self, ids):
def embedding_lookup(self,
ids,
name=None,
max_norm=None) -> (tf.Tensor, EmbeddingWeights):
return self.__alltoall_embedding_lookup__(ids), self.shadow
max_norm=None,
return_trainable=False) -> (tf.Tensor, EmbeddingWeights):
if return_trainable:
return self.__alltoall_embedding_lookup__(ids), self.shadow
else:
return self.__alltoall_embedding_lookup__(ids)
9 changes: 6 additions & 3 deletions tools/testing/build_and_run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ if [ "$TF_VERSION" = "2.6.3" ] ; then
python -m pip install numpy==1.19.5 --force-reinstall
fi

bazel clean --expunge

# Avoid SystemError: initialization of _pywrap_checkpoint_reader raised unreported exception
pip install tensorflow==$TF_VERSION

Expand Down Expand Up @@ -79,8 +81,9 @@ fi

python -m pytest -v -s --functions-durations=20 --modules-durations=5 $IGNORE_HKV $SKIP_CUSTOM_OP_TESTS_FLAG $EXTRA_ARGS ./tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/



# Release disk space
bazel clean --expunge
rm -f ./tensorflow_recommenders_addons/dynamic_embedding/core/_*_ops.so
sudo rm -f ./tensorflow_recommenders_addons/dynamic_embedding/core/_*_ops.so
sudo rm -rf /tmp/*
apt-get clean && rm -rf /var/lib/apt/lists/*
sudo rm -rf /var/log/*
Loading