Skip to content

Commit

Permalink
Convert directory fbcode/deeplearning to use the Ruff Formatter
Browse files Browse the repository at this point in the history
Summary:
Converts the directory specified to use the Ruff formatter in pyfmt

ruff_dog

If this diff causes merge conflicts when rebasing, please run
`hg status -n -0 --change . -I '**/*.{py,pyi}' | xargs -0 arc pyfmt`
on your diff, and amend any changes before rebasing onto latest.
That should help reduce or eliminate any merge conflicts.

allow-large-files
bypass-github-export-checks

Differential Revision: D63766623
  • Loading branch information
Thomas Polasek authored and facebook-github-bot committed Oct 11, 2024
1 parent f59d5ee commit 57fa166
Show file tree
Hide file tree
Showing 21 changed files with 35 additions and 55 deletions.
1 change: 0 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@


def configureDoxyfile(input_dir, output_dir):

with open("Doxyfile.in", "r") as file:
filedata = file.read()

Expand Down
18 changes: 9 additions & 9 deletions fbgemm_gpu/bench/histogram_binning_calibration_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,9 @@ def fbgemm_generic_hbc_by_feature_cpu(input: Tensor) -> Tuple[Tensor, Tensor]:
if step >= warmup_runs:
total_time["hbc"]["cpu"][data_type] += hbc_time
total_time["hbc_by_feature"]["cpu"][data_type] += hbc_by_feature_time
total_time["generic_hbc_by_feature"]["cpu"][
data_type
] += generic_hbc_by_feature_time
total_time["generic_hbc_by_feature"]["cpu"][data_type] += (
generic_hbc_by_feature_time
)

if torch.cuda.is_available():
bin_num_examples_gpu: Tensor = bin_num_examples.cuda()
Expand Down Expand Up @@ -260,12 +260,12 @@ def fbgemm_generic_hbc_by_feature_gpu(
)
if step >= warmup_runs:
total_time["hbc"]["gpu"][data_type] += hbc_time
total_time["hbc_by_feature"]["gpu"][
data_type
] += hbc_by_feature_time
total_time["generic_hbc_by_feature"]["gpu"][
data_type
] += generic_hbc_by_feature_time
total_time["hbc_by_feature"]["gpu"][data_type] += (
hbc_by_feature_time
)
total_time["generic_hbc_by_feature"]["gpu"][data_type] += (
generic_hbc_by_feature_time
)

for op, curr_items in total_time.items():
for platform, data_items in curr_items.items():
Expand Down
5 changes: 3 additions & 2 deletions fbgemm_gpu/bench/split_table_batched_embeddings_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,8 +586,9 @@ def uvm(
assert (
use_cache
), "--use-cache is required for --no-conflict-misses or all-conflict-misses"
assert (no_conflict_misses and not all_conflict_misses) or (
not no_conflict_misses and all_conflict_misses
assert (
(no_conflict_misses and not all_conflict_misses)
or (not no_conflict_misses and all_conflict_misses)
), "Cannot use both --no-conflict-misses and --all-conflict-misses at the same time!"
logging.info(
"Evaluate {}: Cache shape {}".format(
Expand Down
1 change: 0 additions & 1 deletion fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,6 @@ def nbit_ssd(
enforce_hbm: bool,
ssd_cache_loc: str,
) -> None:

np.random.seed(42)
torch.manual_seed(42)
B = batch_size
Expand Down
6 changes: 4 additions & 2 deletions fbgemm_gpu/experimental/example/test/triton_example_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@

@triton.jit
# fmt: off
def triton_add_kernel(x_ptr, y_ptr, z_ptr, n_elements, BLOCK_SIZE: tl.constexpr) -> None:
# fmt: on # noqa E115
def triton_add_kernel(
x_ptr, y_ptr, z_ptr, n_elements, BLOCK_SIZE: tl.constexpr
) -> None:
# fmt: on # noqa E115

# We use a 1D launch grid so axis is 0.
pid = tl.program_id(axis=0)
Expand Down
6 changes: 2 additions & 4 deletions fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,6 @@ def _kernel_matmul_fp8_row_tma_persistent(


class TmaAutoTuneHelper:

# duck typing wrapper to implement the same interface as TmaDescKernelParam in Triton PR #4498
class KernelParamWrapper:
def __init__(self, desc):
Expand Down Expand Up @@ -1452,7 +1451,6 @@ def _kernel_matmul_fp8_block_fastacc(
k_multiple = scale_block_k // BLOCK_K

for k in range(0, tl.cdiv(K, BLOCK_K * SPLIT_K)):

k_remaining = K - k * (BLOCK_K * SPLIT_K)

if EVEN_K:
Expand Down Expand Up @@ -2336,8 +2334,8 @@ def triton_quantize_fp8_block(
torch.Tensor : [M, K] fp8 scaled tensor.
torch.Tensor: [cdiv(M, block_m), cdiv(K, block_k)] reciprocal scale tensor per block.
"""
assert x.device != torch.device(
"cpu"
assert (
x.device != torch.device("cpu")
), "Blockwise quantization not support on cpu, please use row-wise quantization instead."
x_shape = x.shape
x = x.view(-1, x.size(-1))
Expand Down
2 changes: 1 addition & 1 deletion fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def benchmark(
self,
*args,
bench_quantize: bool = False,
use_rotating_buffer_bench: bool = False
use_rotating_buffer_bench: bool = False,
) -> float:
"""Benchmark runtime of this operator."""
if bench_quantize:
Expand Down
1 change: 0 additions & 1 deletion fbgemm_gpu/experimental/gen_ai/test/attention/gqa_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,6 @@ def mqa_reference(
q[0][0][0][0] = 1000
# cache_x_ref is for input to reference implementation
if dtype in ["fp8", "int4"]:

if dtype == "fp8":
num_groups = 1
qparam_offset = 4 * num_groups
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,6 @@ def test_positional_encoding_with_paged_attention(
B: int,
BLOCK_N: int,
) -> None:

N_H_L = 1
N_KVH_L = 8
D_H = 128
Expand Down
4 changes: 1 addition & 3 deletions fbgemm_gpu/fbgemm_gpu/split_embedding_inference_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,7 @@ def _process_split_embs(self, model: torch.nn.Module) -> None:
f"Embedding dim {D} couldn't be divided by align size {weights_ty.align_size()}!"
)
assert D % 4 == 0
weights_ty = (
SparseType.FP16
) # fall back to FP16 if dimension couldn't be aligned with the required size
weights_ty = SparseType.FP16 # fall back to FP16 if dimension couldn't be aligned with the required size
embedding_specs.append(("", E, D, weights_ty))

weight_lists = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2817,9 +2817,7 @@ def _recording_to_timer(
if self.stats_reporter is not None and self.stats_reporter.should_report(
self.step
):
assert (
timer
), "We shouldn't be here, async timer must have been initiated if reporter is present."
assert timer, "We shouldn't be here, async timer must have been initiated if reporter is present."
return timer.recording(**kwargs)
# No-Op context manager
return contextlib.nullcontext()
Expand Down
11 changes: 5 additions & 6 deletions fbgemm_gpu/fbgemm_gpu/tbe/ssd/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -1436,7 +1436,6 @@ def forward(
)

if len(self.timesteps_prefetched) == 0:

with self._recording_to_timer(
self.ssd_prefetch_read_timer,
context=self.step,
Expand Down Expand Up @@ -1776,7 +1775,8 @@ def _report_ssd_io_stats(self) -> None:
this function fetch the stats from EmbeddingRocksDB and report it with stats_reporter
"""
ssd_io_duration = self.ssd_db.get_rocksdb_io_duration(
self.step, self.stats_reporter.report_interval # pyre-ignore
self.step,
self.stats_reporter.report_interval, # pyre-ignore
)

if len(ssd_io_duration) != 5:
Expand Down Expand Up @@ -1878,7 +1878,8 @@ def _report_l2_cache_perf_stats(self) -> None:
return

l2_cache_perf_stats = self.ssd_db.get_l2cache_perf(
self.step, stats_reporter.report_interval # pyre-ignore
self.step,
stats_reporter.report_interval, # pyre-ignore
)

if len(l2_cache_perf_stats) != 15:
Expand Down Expand Up @@ -2001,9 +2002,7 @@ def _recording_to_timer(
if self.stats_reporter is not None and self.stats_reporter.should_report(
self.step
):
assert (
timer
), "We shouldn't be here, async timer must have been initiated if reporter is present."
assert timer, "We shouldn't be here, async timer must have been initiated if reporter is present."
return timer.recording(**kwargs)
# No-Op context manager
return contextlib.nullcontext()
3 changes: 1 addition & 2 deletions fbgemm_gpu/test/lint/check_meta_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
# LICENSE file in the root directory of this source tree.


"""Check Python source code contains Meta copyright header
"""
"""Check Python source code contains Meta copyright header"""

from __future__ import annotations

Expand Down
4 changes: 1 addition & 3 deletions fbgemm_gpu/test/quantize/fused_8bit_rowwise_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,7 @@ def test_quantize_and_dequantize_op( # noqa: C901
reference = torch.from_numpy(
fused_rowwise_8bit_dequantize_2bytes_padding_scale_bias_first_reference(
quantize_data_numpy
)[
:, :ncols
]
)[:, :ncols]
)
if output_dtype == SparseType.FP32:
torch.testing.assert_close(
Expand Down
1 change: 0 additions & 1 deletion fbgemm_gpu/test/quantize/mx4_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ def fake_quantize_mx(

# @optests.generate_opcheck_tests()
class TestMXQuantizationConversion(unittest.TestCase):

@unittest.skipIf(*gpu_unavailable)
# pyre-fixme[56]:
@given(
Expand Down
4 changes: 1 addition & 3 deletions fbgemm_gpu/test/tbe/inference/nbit_forward_autovec_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,7 @@ def execute_nbit_forward_( # noqa C901
scale_shift[:, :] = torch.tensor(
# pyre-fixme[61]: `scales` is undefined, or not always defined.
# pyre-fixme[61]: `shifts` is undefined, or not always defined.
np.stack([scales, shifts], axis=1)
.astype(np.float16)
.view(np.uint8)
np.stack([scales, shifts], axis=1).astype(np.float16).view(np.uint8)
)

fake_quantize_embs(
Expand Down
4 changes: 1 addition & 3 deletions fbgemm_gpu/test/tbe/inference/nbit_forward_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,9 +523,7 @@ def execute_nbit_forward_( # noqa C901
scale_shift[:, :] = torch.tensor(
# pyre-fixme[61]: `scales` is undefined, or not always defined.
# pyre-fixme[61]: `shifts` is undefined, or not always defined.
np.stack([scales, shifts], axis=1)
.astype(np.float16)
.view(np.uint8)
np.stack([scales, shifts], axis=1).astype(np.float16).view(np.uint8)
)

fake_quantize_embs(
Expand Down
4 changes: 1 addition & 3 deletions fbgemm_gpu/test/tbe/ssd/ssd_split_tbe_inference_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,7 @@ def test_nbit_ssd_cache(
scale_shift[:, :] = torch.tensor(
# pyre-fixme[61]: `scales` is undefined, or not always defined.
# pyre-fixme[61]: `shifts` is undefined, or not always defined.
np.stack([scales, shifts], axis=1)
.astype(np.float16)
.view(np.uint8)
np.stack([scales, shifts], axis=1).astype(np.float16).view(np.uint8)
)

D_bytes = rounded_row_size_in_bytes(
Expand Down
5 changes: 1 addition & 4 deletions fbgemm_gpu/test/tbe/ssd/ssd_split_tbe_training_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,10 +925,7 @@ def _prefetch(b_it: int) -> int:
# pyre-fixme[16]: Optional type has no attribute `float`.
optim_state_r.add_(
# pyre-fixme[16]: `Optional` has no attribute `float`.
emb_r.weight.grad.float()
.to_dense()
.pow(2)
.mean(dim=1)
emb_r.weight.grad.float().to_dense().pow(2).mean(dim=1)
)
torch.testing.assert_close(
optim_state_t.float(),
Expand Down
1 change: 0 additions & 1 deletion fbgemm_gpu/test/tbe/ssd/ssd_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ def test_scratch_pad_indices_queue(
for indices, lookup_indices, count, lookup_count in zip(
all_indices, all_lookup_indices, all_counts, all_lookup_counts
):

# Run reference
# Prepare inputs for the reference run
sp_prev_curr_map_ref = torch.zeros_like(lookup_indices)
Expand Down
4 changes: 3 additions & 1 deletion fbgemm_gpu/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,9 @@ def use_cpu_strategy() -> st.SearchStrategy[bool]:
st.booleans()
if (gpu_available and not TEST_WITH_ROCM)
# fmt: off
else st.just(False) if (gpu_available and TEST_WITH_ROCM) else st.just(True)
else st.just(False)
if (gpu_available and TEST_WITH_ROCM)
else st.just(True)
# fmt: on
)

Expand Down

0 comments on commit 57fa166

Please sign in to comment.