Skip to content

Commit

Permalink
Fix flakes in masked lm testing by removing any indeterminism (keras-…
Browse files Browse the repository at this point in the history
  • Loading branch information
mattdangerw authored Jul 20, 2023
1 parent 32ff68d commit 85d7c53
Show file tree
Hide file tree
Showing 13 changed files with 34 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def setUp(self):

self.preprocessor = AlbertMaskedLMPreprocessor(
tokenizer=tokenizer,
# Simplify out testing by masking every available token.
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
Expand Down
2 changes: 1 addition & 1 deletion keras_nlp/models/albert/albert_masked_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def setUp(self):

self.preprocessor = AlbertMaskedLMPreprocessor(
tokenizer=tokenizer,
# Simplify out testing by masking every available token.
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
Expand Down
2 changes: 1 addition & 1 deletion keras_nlp/models/bert/bert_masked_lm_preprocessor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def setUp(self):

self.preprocessor = BertMaskedLMPreprocessor(
tokenizer=tokenizer,
# Simplify out testing by masking every available token.
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
Expand Down
4 changes: 2 additions & 2 deletions keras_nlp/models/bert/bert_masked_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ def setUp(self):
self.vocab += ["the", "quick", "brown", "fox", "."]
self.preprocessor = BertMaskedLMPreprocessor(
BertTokenizer(vocabulary=self.vocab),
# Simplify out testing by masking every available token.
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
mask_selection_length=2,
mask_selection_length=5,
sequence_length=5,
)
self.backbone = BertBackbone(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def setUp(self):
self.tokenizer = DebertaV3Tokenizer(proto=self.proto)
self.preprocessor = DebertaV3MaskedLMPreprocessor(
tokenizer=self.tokenizer,
# Simplify out testing by masking every available token.
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
Expand Down
6 changes: 5 additions & 1 deletion keras_nlp/models/deberta_v3/deberta_v3_masked_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ def setUp(self):
proto = bytes_io.getvalue()
self.preprocessor = DebertaV3MaskedLMPreprocessor(
DebertaV3Tokenizer(proto=proto),
mask_selection_length=2,
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
mask_selection_length=5,
sequence_length=5,
)
self.backbone = DebertaV3Backbone(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def setUp(self):
tokenizer=DistilBertTokenizer(
vocabulary=self.vocab,
),
# Simplify out testing by masking every available token.
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
Expand Down
6 changes: 5 additions & 1 deletion keras_nlp/models/distil_bert/distil_bert_masked_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,12 @@ def setUp(self):
self.vocab += ["the", "quick", "brown", "fox", "."]
self.preprocessor = DistilBertMaskedLMPreprocessor(
DistilBertTokenizer(vocabulary=self.vocab),
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
mask_selection_length=5,
sequence_length=5,
mask_selection_length=2,
)
self.backbone = DistilBertBackbone(
vocabulary_size=self.preprocessor.tokenizer.vocabulary_size(),
Expand Down
6 changes: 5 additions & 1 deletion keras_nlp/models/f_net/f_net_masked_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,12 @@ def setUp(self):
self.proto = bytes_io.getvalue()
self.preprocessor = FNetMaskedLMPreprocessor(
FNetTokenizer(proto=self.proto),
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
mask_selection_length=5,
sequence_length=5,
mask_selection_length=2,
)
self.backbone = FNetBackbone(
vocabulary_size=self.preprocessor.tokenizer.vocabulary_size(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def setUp(self):
vocabulary=vocab,
merges=merges,
),
# Simplify out testing by masking every available token.
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
Expand Down
6 changes: 5 additions & 1 deletion keras_nlp/models/roberta/roberta_masked_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,12 @@ def setUp(self):
self.merges = merges
self.preprocessor = RobertaMaskedLMPreprocessor(
RobertaTokenizer(vocabulary=self.vocab, merges=self.merges),
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
mask_selection_length=5,
sequence_length=5,
mask_selection_length=2,
)
self.backbone = RobertaBackbone(
vocabulary_size=self.preprocessor.tokenizer.vocabulary_size(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def setUp(self):
self.tokenizer = XLMRobertaTokenizer(proto=self.proto)
self.preprocessor = XLMRobertaMaskedLMPreprocessor(
tokenizer=self.tokenizer,
# Simplify out testing by masking every available token.
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
Expand Down
6 changes: 5 additions & 1 deletion keras_nlp/models/xlm_roberta/xlm_roberta_masked_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,12 @@ def setUp(self):

self.preprocessor = XLMRobertaMaskedLMPreprocessor(
XLMRobertaTokenizer(proto=self.proto),
# Simplify our testing by masking every available token.
mask_selection_rate=1.0,
mask_token_rate=1.0,
random_token_rate=0.0,
mask_selection_length=5,
sequence_length=5,
mask_selection_length=2,
)

self.backbone = XLMRobertaBackbone(
Expand Down

0 comments on commit 85d7c53

Please sign in to comment.