Setup pre-commit hooks. (#1)

nod-ai · Apr 21, 2024 · ba15477 · ba15477
1 parent daa769e
commit ba15477
Show file tree

Hide file tree

Showing 14 changed files with 73 additions and 33 deletions.
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
@@ -0,0 +1,14 @@
+name: pre-commit
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v3
+    - uses: pre-commit/[email protected]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,14 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-yaml
+    -   id: check-added-large-files
+-   repo: https://github.com/psf/black
+    rev: 22.10.0
+    hooks:
+    -   id: black
diff --git a/LICENSE b/LICENSE
@@ -216,4 +216,3 @@ conflicts with the conditions of the GPLv2, you may retroactively and
 prospectively choose to deem waived or otherwise exclude such Section(s) of
 the License, but only in their entirety and only with respect to the Combined
 Software.
-
diff --git a/README.md b/README.md
@@ -3,9 +3,12 @@
 **WARNING: This is an early preview that is in progress. It is not ready for
 general use.**
 
+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
+
+
 ## Development Getting Started
 
-Use this as a guide to get started developing the project using pinned, 
+Use this as a guide to get started developing the project using pinned,
 pre-release dependencies. You are welcome to deviate as you see fit, but
 these canonical directions mirror what the CI does.
 
@@ -54,3 +57,9 @@ pip install -e shortfin
 pytest sharktank
 pytest shortfin
 ```
+
+### Optional: Pre-commits and developer settings
+
+This project is set up to use the `pre-commit` tooling. To install it in
+your local repo, run: `pre-commit install`. After this point, when making
+commits locally, hooks will run. See https://pre-commit.com/
diff --git a/requirements.txt b/requirements.txt
@@ -7,4 +7,8 @@ types-requests==2.31.0.20240125
 
 # It is expected that you have installed a PyTorch version/variant specific
 # to your needs, so we only include a minimum version spec.
-torch>=2.3.0
+# TODO: Use a versioned release once 2.3.0 drops.
+torch>=2.3.0.dev1
+
+# Used for managing pre-commit flows.
+pre-commit
diff --git a/sharktank/sharktank/layers/kv_cache.py b/sharktank/sharktank/layers/kv_cache.py
@@ -8,7 +8,7 @@
 
 These are not complete abstractions: they are primarily focused on making
 tightly coupled transformer blocks a bit less "stringy" with loose tensors
-and dims floating around everywhere. 
+and dims floating around everywhere.
 """
 
 import abc

diff --git a/sharktank/sharktank/ops/templates/mmt_block_scaled_offset_q4_unsigned.mlir b/sharktank/sharktank/ops/templates/mmt_block_scaled_offset_q4_unsigned.mlir
@@ -39,8 +39,8 @@ util.func private @sharktank_mmt_block_scaled_offset_q4_unsigned_3d_{n}_{k}_{bs}
           affine_map<(d0, d1, d2) -> (d0, d1, 0)>,
           affine_map<(d0, d1, d2) -> (d0, d1, 0)>,
           affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
-          affine_map<(d0, d1, d2) -> (d0, d1, d2)>], 
-      iterator_types = ["parallel", "parallel", "parallel"] }} 
+          affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+      iterator_types = ["parallel", "parallel", "parallel"] }}
       ins(%d, %m, %qs : !d_tensor_type, !m_tensor_type, !qs_tensor_type)
       outs(%b_grouped : !b_grouped_tensor_type) {{
   ^bb0(%d_element: !scale_type, %m_element: !scale_type, %q_element: !lowp_type, %out: !a_type):
@@ -63,9 +63,9 @@ util.func private @sharktank_mmt_block_scaled_offset_q4_unsigned_3d_{n}_{k}_{bs}
       indexing_maps = [
           // d0 = b, d1 = m, d2 = n, d3 = group0 (r), d4 = block (r)
           affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3, d4)>,
-          affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d4)>, 
-          affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>], 
-      iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"] }} 
+          affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d4)>,
+          affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>],
+      iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"] }}
       ins(%aexp, %b_grouped_dequant : !aexp_tensor_type,  !b_grouped_tensor_type)
       outs(%result_fill : !c_tensor_type) {{
   ^bb0(%a_element: !a_type, %b_element: !a_type, %out: !a_type):

diff --git a/sharktank/sharktank/ops/templates/mmt_block_scaled_q8_3d.mlir b/sharktank/sharktank/ops/templates/mmt_block_scaled_q8_3d.mlir
@@ -32,9 +32,9 @@ util.func private @sharktank_mmt_block_scaled_q8_3d_{n}_{k}_{bs}_{a_type}(
   %b_grouped_dequant = linalg.generic {{
       indexing_maps = [
           affine_map<(d0, d1, d2) -> (d0, d1, 0)>,
-          affine_map<(d0, d1, d2) -> (d0, d1, d2)>, 
-          affine_map<(d0, d1, d2) -> (d0, d1, d2)>], 
-      iterator_types = ["parallel", "parallel", "parallel"] }} 
+          affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
+          affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+      iterator_types = ["parallel", "parallel", "parallel"] }}
       ins(%d, %qs : !d_tensor_type, !qs_tensor_type)
       outs(%b_grouped : !b_grouped_tensor_type) {{
   ^bb0(%d_element: !scale_type, %q_element: !lowp_type, %out: !a_type):
@@ -55,9 +55,9 @@ util.func private @sharktank_mmt_block_scaled_q8_3d_{n}_{k}_{bs}_{a_type}(
       indexing_maps = [
           // d0 = b, d1 = m, d2 = n, d3 = group0 (r), d4 = block (r)
           affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3, d4)>,
-          affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d4)>, 
-          affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>], 
-      iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"] }} 
+          affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d4)>,
+          affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>],
+      iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"] }}
       ins(%aexp, %b_grouped_dequant : !aexp_tensor_type,  !b_grouped_tensor_type)
       outs(%result_fill : !c_tensor_type) {{
   ^bb0(%a_element: !a_type, %b_element: !a_type, %out: !a_type):

diff --git a/sharktank/sharktank/ops/templates/mmt_super_block_scaled_offset_q4_unsigned_3d.mlir b/sharktank/sharktank/ops/templates/mmt_super_block_scaled_offset_q4_unsigned_3d.mlir
@@ -25,8 +25,8 @@
 module {{
 
 util.func private @mmt_super_block_scaled_offset_q4_unsigned_3d_{n}_{k}_{sup_count}_{sub_count}_{bs}_{a_type}(
-    %a: !a_tensor_type, 
-    %d: !d_tensor_type, 
+    %a: !a_tensor_type,
+    %d: !d_tensor_type,
     %dmin: !dmin_tensor_type,
     %sb_scales_hi_i8: !sb_hi_i8_type,
     %sb_scales_low_i8: !sb_low_i8_type,
@@ -59,11 +59,11 @@ util.func private @mmt_super_block_scaled_offset_q4_unsigned_3d_{n}_{k}_{sup_cou
           affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,      // sb_mins_hi[n, sup, sub]
           affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,      // sb_mins_low[n, sup, sub]
           affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>   // out b_grouped[n, sup, sub, bs]
-      ], 
+      ],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"] }}
       ins(
         %qs, %d, %dmin, %sb_scales_hi, %sb_scales_low, %sb_mins_hi, %sb_mins_low :
-        !qs_tensor_type, !d_tensor_type, !dmin_tensor_type, 
+        !qs_tensor_type, !d_tensor_type, !dmin_tensor_type,
         !sb_hi_i2_type, !sb_low_i4_type, !sb_hi_i2_type, !sb_low_i4_type
       )
       outs(%b_grouped : !b_grouped_tensor_type) {{
@@ -74,7 +74,7 @@ util.func private @mmt_super_block_scaled_offset_q4_unsigned_3d_{n}_{k}_{sup_cou
       %shift_4 = arith.constant 4 : i32
       %d_element_ext = arith.extf %d_element : !scale_type to !a_type
       %dmin_element_ext = arith.extf %dmin_element : !scale_type to !a_type
-      
+
       // Combine sub-block scale.
       %sb_scale_low_i32 = arith.extui %sb_scales_low_element : i4 to i32
       %sb_scale_hi_i32 = arith.extui %sb_scales_hi_element : i2 to i32
@@ -111,8 +111,8 @@ util.func private @mmt_super_block_scaled_offset_q4_unsigned_3d_{n}_{k}_{sup_cou
           affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4, d5)>,  // aexp
           affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d3, d4, d5)>,      // b_grouped_dequant
           affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2)>       // out
-      ], 
-      iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "reduction"] }} 
+      ],
+      iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "reduction"] }}
       ins(%aexp, %b_grouped_dequant : !aexp_tensor_type,  !b_grouped_tensor_type)
       outs(%result_fill : !c_tensor_type) {{
   ^bb0(%a_element: !a_type, %b_element: !a_type, %out: !a_type):

diff --git a/sharktank/sharktank/ops/templates/mmtfp_2d.mlir b/sharktank/sharktank/ops/templates/mmtfp_2d.mlir
@@ -20,8 +20,8 @@ util.func private @sharktank_mmtfp_2d_{n}_{k}_{a_type}{bT_type}(
   %c1 = arith.constant 1 : index
   %m = tensor.dim %a, %c0 : !a_tensor_type
   %result_empty = tensor.empty(%m) : !c_tensor_type
-  %result_init = linalg.fill 
-    ins(%zero : {a_type}) 
+  %result_init = linalg.fill
+    ins(%zero : {a_type})
     outs(%result_empty: !c_tensor_type) -> !c_tensor_type
   %result = linalg.matmul_transpose_b
     ins (%a, %bT: !a_tensor_type, !bT_tensor_type)

diff --git a/sharktank/sharktank/ops/templates/mmtfp_3d.mlir b/sharktank/sharktank/ops/templates/mmtfp_3d.mlir
@@ -34,8 +34,8 @@ util.func private @sharktank_mmtfp_3d_{n}_{k}_{a_type}{bT_type}(
         linalg.yield %in : !bT_type
     }} -> !bT_broadcast_tensor_type
   %result_empty = tensor.empty(%b0, %m) : !c_tensor_type
-  %result_init = linalg.fill 
-    ins(%zero : !a_type) 
+  %result_init = linalg.fill
+    ins(%zero : !a_type)
     outs(%result_empty: !c_tensor_type) -> !c_tensor_type
   %result = linalg.batch_matmul_transpose_b
     ins (%a, %bT_broadcast: !a_tensor_type, !bT_broadcast_tensor_type)

diff --git a/shortfin/shortfin/framework/session.py b/shortfin/shortfin/framework/session.py
@@ -10,7 +10,7 @@
 
   * DeviceSession: A single HAL device and other process-level globals. Shared global
     memory and corresponding synchronization handles are accessible from here.
-  * WorkQueue: Logical stream of execution, nested under the DeviceSession. Each 
+  * WorkQueue: Logical stream of execution, nested under the DeviceSession. Each
     queue holds a timeline semaphore which sequences invocations. For these models,
     we route workloads of vastly different characteristics to distinct queues (i.e.
     prefill vs decode step).

diff --git a/shortfin/shortfin/llm/config.py b/shortfin/shortfin/llm/config.py
@@ -7,10 +7,10 @@
 """Configuration objects.
 
 Parameters that are intrinsic to a specific model.
-    
+
 In a typical transformer model, the KV cache is organized similar to (mapped to
 our parameter names below):
-    k = tensor.empty(transformer_block_count, batch_size, seq, 
+    k = tensor.empty(transformer_block_count, batch_size, seq,
                     attn_head_count, attn_head_dim)
     v = ...
 
@@ -28,9 +28,9 @@
 
 In this scenario, we declare that one block holds the KV cache for all transformer
 block layers because it reduces the accounting. As such, for the above example,
-a single position in the sequence will be 524,288 bytes, assuming a 2-byte element 
-type. If we choose to block by block_stride=16 positions, each block will be 8MiB. 
-Assuming we wanted to dedicate 12GiB to the block cache, this would equate to 1536 
+a single position in the sequence will be 524,288 bytes, assuming a 2-byte element
+type. If we choose to block by block_stride=16 positions, each block will be 8MiB.
+Assuming we wanted to dedicate 12GiB to the block cache, this would equate to 1536
 blocks for a total number of sequence positions of 24,576.
 
 These are well-known numbers but are derived above to give a sense of scale.

diff --git a/version_info.json b/version_info.json
@@ -1 +1 @@
-{"package-version": "0.1.dev2"}
+{"package-version": "0.1.dev3"}