diff --git a/.github/workflows/build_packages.yml b/.github/workflows/build_packages.yml index 8f138d973..1200234c4 100644 --- a/.github/workflows/build_packages.yml +++ b/.github/workflows/build_packages.yml @@ -129,7 +129,7 @@ jobs: token: "${{ secrets.RELEASE_PUBLISH_ACCESS_TOKEN }}" tag: "dev-wheels" name: "dev-wheels" - body: "Automatic snapshot release of SHARK-Platform python wheels." + body: "Automatic snapshot release of shark-ai python wheels." removeArtifacts: false allowUpdates: true replacesArtifacts: true diff --git a/.github/workflows/ci-shark-platform.yml b/.github/workflows/ci-shark-ai.yml similarity index 99% rename from .github/workflows/ci-shark-platform.yml rename to .github/workflows/ci-shark-ai.yml index dc2f4646a..28e2bc883 100644 --- a/.github/workflows/ci-shark-platform.yml +++ b/.github/workflows/ci-shark-ai.yml @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -name: CI - shark-platform +name: CI - shark-ai on: workflow_dispatch: diff --git a/README.md b/README.md index 517980838..77f4a0d75 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -# SHARK Modeling and Serving Libraries +# shark-ai: SHARK Modeling and Serving Libraries > [!IMPORTANT] > Development is still in progress for several project components. See the > notes below for which workflows are best supported. -![GitHub License](https://img.shields.io/github/license/nod-ai/SHARK-Platform) +![GitHub License](https://img.shields.io/github/license/nod-ai/shark-ai) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) @@ -15,7 +15,7 @@ -[![PyPI version](https://badge.fury.io/py/shortfin.svg)](https://badge.fury.io/py/shortfin) [![CI - shortfin](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci_linux_x64-libshortfin.yml/badge.svg?event=push)](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci_linux_x64-libshortfin.yml?query=event%3Apush) +[![PyPI version](https://badge.fury.io/py/shortfin.svg)](https://badge.fury.io/py/shortfin) [![CI - shortfin](https://github.com/nod-ai/shark-ai/actions/workflows/ci_linux_x64-libshortfin.yml/badge.svg?event=push)](https://github.com/nod-ai/shark-ai/actions/workflows/ci_linux_x64-libshortfin.yml?query=event%3Apush) The shortfin sub-project is SHARK's high performance inference library and serving engine. @@ -25,7 +25,7 @@ serving engine. ### [`sharktank/`](./sharktank/) -[![PyPI version](https://badge.fury.io/py/sharktank.svg)](https://badge.fury.io/py/sharktank) [![CI - sharktank](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci-sharktank.yml/badge.svg?event=push)](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci-sharktank.yml?query=event%3Apush) +[![PyPI version](https://badge.fury.io/py/sharktank.svg)](https://badge.fury.io/py/sharktank) [![CI - sharktank](https://github.com/nod-ai/shark-ai/actions/workflows/ci-sharktank.yml/badge.svg?event=push)](https://github.com/nod-ai/shark-ai/actions/workflows/ci-sharktank.yml?query=event%3Apush) The SHARK Tank sub-project contains a collection of model recipes and conversion tools to produce inference-optimized programs. @@ -45,7 +45,7 @@ conversion tools to produce inference-optimized programs. ### [`tuner/`](./tuner/) -[![CI - Tuner](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci-tuner.yml/badge.svg?event=push)](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci-tuner.yml?query=event%3Apush) +[![CI - Tuner](https://github.com/nod-ai/shark-ai/actions/workflows/ci-tuner.yml/badge.svg?event=push)](https://github.com/nod-ai/shark-ai/actions/workflows/ci-tuner.yml?query=event%3Apush) The Tuner sub-project assists with tuning program performance by searching for optimal parameter configurations to use during model compilation. @@ -63,8 +63,8 @@ optimal parameter configurations to use during model compilation. Model name | Model recipes | Serving apps ---------- | ------------- | ------------ -SDXL | [`sharktank/sharktank/models/punet/`](https://github.com/nod-ai/SHARK-Platform/tree/main/sharktank/sharktank/models/punet) | [`shortfin/python/shortfin_apps/sd/`](https://github.com/nod-ai/SHARK-Platform/tree/main/shortfin/python/shortfin_apps/sd) -llama | [`sharktank/sharktank/models/llama/`](https://github.com/nod-ai/SHARK-Platform/tree/main/sharktank/sharktank/models/llama) | [`shortfin/python/shortfin_apps/llm/`](https://github.com/nod-ai/SHARK-Platform/tree/main/shortfin/python/shortfin_apps/llm) +SDXL | [`sharktank/sharktank/models/punet/`](https://github.com/nod-ai/shark-ai/tree/main/sharktank/sharktank/models/punet) | [`shortfin/python/shortfin_apps/sd/`](https://github.com/nod-ai/shark-ai/tree/main/shortfin/python/shortfin_apps/sd) +llama | [`sharktank/sharktank/models/llama/`](https://github.com/nod-ai/shark-ai/tree/main/sharktank/sharktank/models/llama) | [`shortfin/python/shortfin_apps/llm/`](https://github.com/nod-ai/shark-ai/tree/main/shortfin/python/shortfin_apps/llm) ## SHARK Users diff --git a/build_tools/python_deploy/pypi_deploy.sh b/build_tools/python_deploy/pypi_deploy.sh index c141aea4f..63f123ac0 100755 --- a/build_tools/python_deploy/pypi_deploy.sh +++ b/build_tools/python_deploy/pypi_deploy.sh @@ -21,7 +21,7 @@ # python3.13t -m ensurepip --upgrade # ``` # * Choose a release candidate to promote from -# https://github.com/nod-ai/SHARK-Platform/releases/tag/dev-wheels +# https://github.com/nod-ai/shark-ai/releases/tag/dev-wheels # # Usage: # ./pypi_deploy.sh 2.9.0rc20241108 @@ -33,7 +33,7 @@ RELEASE="$1" SCRIPT_DIR="$(dirname -- "$( readlink -f -- "$0"; )")"; REPO_ROOT="$(cd "$SCRIPT_DIR"/../../ && pwd)" TMPDIR="$(mktemp --directory --tmpdir shark_platform_pypi_wheels.XXXXX)" -ASSETS_PAGE="https://github.com/nod-ai/SHARK-Platform/releases/expanded_assets/dev-wheels" +ASSETS_PAGE="https://github.com/nod-ai/shark-ai/releases/expanded_assets/dev-wheels" # TODO: rewrite in Python? diff --git a/docs/model_cookbook.md b/docs/model_cookbook.md index fdf4c7ede..ddc0cb3bb 100644 --- a/docs/model_cookbook.md +++ b/docs/model_cookbook.md @@ -1,6 +1,6 @@ # Model cookbook -Note: These are early notes and commands that the SHARK-Platform team is using +Note: These are early notes and commands that the shark-ai team is using and will turn into proper docs later. ## Diagrams diff --git a/docs/nightly_releases.md b/docs/nightly_releases.md index 819e22f61..545cdd4f5 100644 --- a/docs/nightly_releases.md +++ b/docs/nightly_releases.md @@ -2,19 +2,19 @@ > [!WARNING] > This is still under development! See -> https://github.com/nod-ai/SHARK-Platform/issues/400. +> https://github.com/nod-ai/shark-ai/issues/400. > > These instructions will be converted into a user guide once stable packages -> are published to PyPI: . +> are published to PyPI: . Nightly releases are uploaded to -https://github.com/nod-ai/SHARK-Platform/releases/tag/dev-wheels. +https://github.com/nod-ai/shark-ai/releases/tag/dev-wheels. * The "expanded_assets" version of a release page is compatible with the `-f, --find-links ` options of `pip install` ([docs here](https://pip.pypa.io/en/stable/cli/pip_install/#cmdoption-f)). For the "dev-wheels" release above, that page is: - + * These releases are generated using [`.github/workflows/build_package.yml`](../.github/workflows/build_packages.yml) * That workflow runs the @@ -23,7 +23,7 @@ https://github.com/nod-ai/SHARK-Platform/releases/tag/dev-wheels. [`shortfin/build_tools/build_linux_package.sh`](../shortfin/build_tools/build_linux_package.sh) scripts * Workflow history can be viewed at - + ## Prerequisites @@ -38,7 +38,7 @@ source builds. You will need a recent version of Python. * As of Nov 1, 2024, sharktank is compatible with Python 3.11. See - https://github.com/nod-ai/SHARK-Platform/issues/349 for Python 3.12 support. + https://github.com/nod-ai/shark-ai/issues/349 for Python 3.12 support. * As of Nov 4, 2024, shortfin publishes packages for Python 3.11, 3.12, 3.13, and 3.13t @@ -67,7 +67,7 @@ python3.11 -m venv 3.11.venv source 3.11.venv/bin/activate # Install 'sharktank' package from nightly releases. -pip install sharktank -f https://github.com/nod-ai/SHARK-Platform/releases/expanded_assets/dev-wheels +pip install sharktank -f https://github.com/nod-ai/shark-ai/releases/expanded_assets/dev-wheels # Test the installation. python -c "from sharktank import ops; print('Sanity check passed')" @@ -84,7 +84,7 @@ python3.11 -m venv 3.11.venv source 3.11.venv/bin/activate # Install 'shortfin' package from nightly releases. -pip install shortfin -f https://github.com/nod-ai/SHARK-Platform/releases/expanded_assets/dev-wheels +pip install shortfin -f https://github.com/nod-ai/shark-ai/releases/expanded_assets/dev-wheels # Test the installation. python -c "import shortfin as sf; print('Sanity check passed')" diff --git a/docs/quantization.md b/docs/quantization.md index fcc8961b0..25bfc9f8d 100644 --- a/docs/quantization.md +++ b/docs/quantization.md @@ -64,11 +64,11 @@ amount of Python code implementing direct math and packing schemes. PyTorch modules like `Linear` and `Conv2D`. 2. Types/Ops: The `nn.Module` implementations we provide are built in terms of SHARK Tank custom - [`InferenceTensor`](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/types/tensors.py#L153) - and [polymorphic functional ops library](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/ops/signatures.py). + [`InferenceTensor`](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/types/tensors.py#L153) + and [polymorphic functional ops library](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/ops/signatures.py). 3. Op specializations for optimized subsets of op type signatures and features (for example, [an optimized affine quantized linear specialization for - supported combinations of `TensorScaledLayout` arguments](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/ops/qlinear_impls.py)). + supported combinations of `TensorScaledLayout` arguments](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/ops/qlinear_impls.py)). (TODO: good place for a diagram) @@ -78,18 +78,18 @@ amount of Python code implementing direct math and packing schemes. Available modules that support direct quantization (TODO: refactor to use torch "Module" terminology and naming schemes consistently): -* [`LinearLayer`](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/layers/linear.py) -* [convolution layers](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/layers/conv.py) +* [`LinearLayer`](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/layers/linear.py) +* [convolution layers](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/layers/conv.py) Note that most sharktank modules extend -[`ThetaLayer`](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/layers/base.py#L63), +[`ThetaLayer`](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/layers/base.py#L63), which calls for a bit of explanation. Traditional PyTorch Modules directly instantiate their backing parameters in their constructor. For dataset-heavy and polymorphic implementations like we commonly see in quantization and distribution, however, it can be beneficial to separate these concerns. The `ThetaLayer` simply takes a -[`Theta` object](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/types/theta.py#L74), +[`Theta` object](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/types/theta.py#L74), which is a tree-structured bag of native `torch.Tensor` or `InferenceTensor` instances, and it adopts the tensors in the bag as its own vs creating them. For those familiar with the concept, this is a form of dependency-injection @@ -114,7 +114,7 @@ tree to a specific Module instance. We've already met the `Theta` object above, which holds a tree of something called an -[`InferenceTensor`](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/types/tensors.py#L153). +[`InferenceTensor`](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/types/tensors.py#L153). Now we describe what this is. Note that presently, `InferenceTensor` is not a `torch.Tensor` but its own `ABC` type that: @@ -140,11 +140,11 @@ pipelines. There is a growing list of `InferenceTensor` sub-types, many of which are related to quantization: -* [`PrimitiveTensor`](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/types/tensors.py#L286): +* [`PrimitiveTensor`](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/types/tensors.py#L286): A simple composition of a single `torch.Tensor`. This is often used interchangeably with a `torch.Tensor` but is present for completeness of the type hierarchy and to be able to type select on. -* [`QuantizedTensor`](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/types/tensors.py#L372): +* [`QuantizedTensor`](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/types/tensors.py#L372): Abstract base class of all quantized tensors, providing two primary operations: * `unpack`: Accesses the backing `QuantizedLayout` of the tensor, which is @@ -154,12 +154,12 @@ related to quantization: layout, this explodes it into a canonical representation of individual tensors which can be algebraically implemented individually/generically). -* [`PlanarQuantizedTensor`](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/types/tensors.py#L408): +* [`PlanarQuantizedTensor`](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/types/tensors.py#L408): Concrete implementation for all non-packed quantized tensors that can be losslessly represented by a layout based on individual tensor components. All `QuantizedTensor` instances can be converted to a `PlanarQuantizedTensor`. -* [`QuantizerTensor`](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/types/tensors.py#L408): +* [`QuantizerTensor`](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/types/tensors.py#L408): (note the "r" in the name) An abstract `InferenceTensor` that exposes a `quantize(torch.Tensor | InferenceTensor) -> QuantizedTensor` operation used to transform an arbitrary tensor to a quantized form. There are a handful @@ -178,7 +178,7 @@ manipulate tensor contents via `QuantizedLayout`, but we haven't yet defined that. The *Tensor types are structural and exist to give identity, but the `QuantizedLayout` is where the "magic happens". -[`QuantizedLayout`](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/types/tensors.py#L44) +[`QuantizedLayout`](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/types/tensors.py#L44) is an `ABC`, supporting: * Serialization/interop with parameter archives. @@ -193,7 +193,7 @@ is an `ABC`, supporting: There are a number of implementations, as every quantization scheme typically needs at least one concrete `QuantizedLayout`. Simple schemes like affine quantization can be fully defined in terms of a single -[`TensorScaledLayout`](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/types/layouts.py#L43). +[`TensorScaledLayout`](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/types/layouts.py#L43). Whereas packed schemes like we find in inference engines like GGML and XNNPACK optimally require both a packed layout and a planar layout. @@ -224,7 +224,7 @@ interpreting/transforming using their natively defined forms. Previously, we found a rich type system defining all manner of layouts and quantization schemes, but what can be done with it? That is where the sharktank functional op library comes in. These -[logical ops](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/ops/signatures.py) +[logical ops](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/ops/signatures.py) provide the building blocks to implement built-in and custom `nn.Module` implementations operating on `InferenceTensor` (and torch.Tensor) types. @@ -239,12 +239,12 @@ implementation at any needed level of granularity: structures and preserve it when computing (when combined with a fusing compiler, this alone provides decent fallback implementations for a variety of "weight compression" oriented techniques). See - [some examples](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/ops/custom_impls.py#L51). + [some examples](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/ops/custom_impls.py#L51). * Pure-Torch decompositions for algebraic techniques like affine quantization (when combined with a fusing compiler, this alone is sufficient for optimization). See - [qlinear](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/ops/qlinear_impls.py) and - [qconv](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/ops/qconv_impls.py) + [qlinear](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/ops/qlinear_impls.py) and + [qconv](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/ops/qconv_impls.py) implementations of actual affine quantized decompositions. * Completely custom packed/optimized implementation. These can be written to activate on any level of detail of the type hierarchy. The implementation @@ -280,8 +280,8 @@ level. Some examples: [tensor trace/print](https://github.com/iree-org/iree-turbine/blob/main/iree.turbine/ops/iree.py#L52) * [Simple linalg based template expansion](https://github.com/iree-org/iree-turbine/blob/main/iree.turbine/ops/_jinja_test_ops.py#L28) (see backing example [jinja template](https://github.com/iree-org/iree-turbine/blob/main/iree.turbine/ops/templates/test_add_jinja.mlir)). -* Optimal linalg-based [8-bit block scaled mmt for weight compression](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/kernels/mmt_block_scaled_q8.py) - (see backing [jinja template](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/kernels/templates/mmt_block_scaled_q8_3d.mlir)). +* Optimal linalg-based [8-bit block scaled mmt for weight compression](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/kernels/mmt_block_scaled_q8.py) + (see backing [jinja template](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/kernels/templates/mmt_block_scaled_q8_3d.mlir)). * DSL based [like this fused attention kernel](https://github.com/iree-org/iree-turbine/blob/main/tests/kernel/fused_attention_test.py#L20) (note that in this case, the DSL exports to the unerlying IR-based registration mechanism used in the previous examples). @@ -292,8 +292,8 @@ Since all of these types of custom kernels are just defined with simple Python tooling, they are really fast to iterate on. The linalg based kernels specifically tend to be highly portable, and we don't hesitate to write one of those when we need something specific that PyTorch doesn't provide out of the box -(i.e. [proper mixed-precision integer conv](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/kernels/conv_2d_nchw_fchw.py) -([template](https://github.com/nod-ai/SHARK-Platform/blob/main/sharktank/sharktank/kernels/templates/conv_2d_nchw_fchw.mlir))). +(i.e. [proper mixed-precision integer conv](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/kernels/conv_2d_nchw_fchw.py) +([template](https://github.com/nod-ai/shark-ai/blob/main/sharktank/sharktank/kernels/templates/conv_2d_nchw_fchw.mlir))). ## Dataset transformation @@ -307,7 +307,7 @@ We take a practical approach to this, writing implementation specific converters where needed, and taking advantage of industry-standard consolidation points where available (like GGUF) in order to cover a wider surface area. -Behind both is the notion of a [`Dataset`](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/types/theta.py#L263), +Behind both is the notion of a [`Dataset`](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/types/theta.py#L263), which combines some set of hyper-parameters with a root `Theta` object (typically representing the layer-tree of frozen tensors). Datasets can be losslessly persisted to IREE IRPA files, which can then be loaded by either @@ -321,9 +321,9 @@ transform, shard, etc. See some examples: -* [models/punet/tools/import_hf_dataset.py](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/models/punet/tools/import_hf_dataset.py) : +* [models/punet/tools/import_hf_dataset.py](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/models/punet/tools/import_hf_dataset.py) : Creating a `Dataset` object from an HF diffusers safetensors file and config.json. -* [models/punet/tools/import_brevitas_dataset.py](https://github.com/nod-ai/SHARK-Platform/blob/quant_docs/sharktank/sharktank/models/punet/tools/import_brevitas_dataset.py) : +* [models/punet/tools/import_brevitas_dataset.py](https://github.com/nod-ai/shark-ai/blob/quant_docs/sharktank/sharktank/models/punet/tools/import_brevitas_dataset.py) : Creates a quantized `Dataset` by combining: * HF diffusers `config.json` diff --git a/docs/shortfin/llm/developer/e2e_llama8b_mi300x.md b/docs/shortfin/llm/developer/e2e_llama8b_mi300x.md index e3150ed5c..1ce2d1e8d 100644 --- a/docs/shortfin/llm/developer/e2e_llama8b_mi300x.md +++ b/docs/shortfin/llm/developer/e2e_llama8b_mi300x.md @@ -16,7 +16,7 @@ process of exporting a model for use in the shortfin llm server with an MI300 GP ### Setting Up Environment Follow the `Development Getting Started` docs -[here](https://github.com/nod-ai/SHARK-Platform/blob/main/README.md#development-getting-started) +[here](https://github.com/nod-ai/shark-ai/blob/main/README.md#development-getting-started) to setup your environment for development. We will use an example with `llama_8b_f16_decomposed` in order to describe the diff --git a/docs/shortfin/llm/user/e2e_llama8b_mi300x.md b/docs/shortfin/llm/user/e2e_llama8b_mi300x.md index 985e55c13..5e0749546 100644 --- a/docs/shortfin/llm/user/e2e_llama8b_mi300x.md +++ b/docs/shortfin/llm/user/e2e_llama8b_mi300x.md @@ -36,8 +36,8 @@ pip install shark-ai #### Nightly ```bash -pip install sharktank -f https://github.com/nod-ai/SHARK-Platform/releases/expanded_assets/dev-wheels -pip install shortfin -f https://github.com/nod-ai/SHARK-Platform/releases/expanded_assets/dev-wheels +pip install sharktank -f https://github.com/nod-ai/shark-ai/releases/expanded_assets/dev-wheels +pip install shortfin -f https://github.com/nod-ai/shark-ai/releases/expanded_assets/dev-wheels ``` #### Install dataclasses-json diff --git a/docs/user_guide.md b/docs/user_guide.md index b7a530583..c3da1f4f5 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -50,7 +50,7 @@ pip install shark-ai[apps] Temporarily, you may need an update to your `shortfin` install. Install the latest pre-release with: ``` -pip install shortfin --upgrade --pre -f https://github.com/nod-ai/SHARK-Platform/releases/expanded_assets/dev-wheels +pip install shortfin --upgrade --pre -f https://github.com/nod-ai/shark-ai/releases/expanded_assets/dev-wheels ``` ### Test the installation. diff --git a/shark-ai/pyproject.toml b/shark-ai/pyproject.toml index f78a1641f..133026d13 100644 --- a/shark-ai/pyproject.toml +++ b/shark-ai/pyproject.toml @@ -24,7 +24,7 @@ requires-python = ">= 3.10" dynamic = ["version", "dependencies"] [project.urls] -Repository = "https://github.com/nod-ai/SHARK-Platform" +Repository = "https://github.com/nod-ai/shark-ai" [project.optional-dependencies] onnx = [ diff --git a/sharktank/README.md b/sharktank/README.md index c36cdd055..7770595ed 100644 --- a/sharktank/README.md +++ b/sharktank/README.md @@ -12,7 +12,7 @@ tooling. ## Project Status -[![CI - Perplexity](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci_eval.yaml/badge.svg?branch=main&event=schedule)](https://github.com/nod-ai/SHARK-Platform/actions/workflows/ci_eval.yaml) +[![CI - Perplexity](https://github.com/nod-ai/shark-ai/actions/workflows/ci_eval.yaml/badge.svg?branch=main&event=schedule)](https://github.com/nod-ai/shark-ai/actions/workflows/ci_eval.yaml) ## Examples diff --git a/sharktank/pyproject.toml b/sharktank/pyproject.toml index 65f264d16..e5f9972a3 100644 --- a/sharktank/pyproject.toml +++ b/sharktank/pyproject.toml @@ -24,7 +24,7 @@ requires-python = ">= 3.11" dynamic = ["version", "dependencies", "optional-dependencies"] [project.urls] -Repository = "https://github.com/nod-ai/SHARK-Platform" +Repository = "https://github.com/nod-ai/shark-ai" [tool.setuptools.packages.find] where = ["."] diff --git a/sharktank/sharktank/ops/custom_impls.py b/sharktank/sharktank/ops/custom_impls.py index 8f6654a8e..9acc7c562 100644 --- a/sharktank/sharktank/ops/custom_impls.py +++ b/sharktank/sharktank/ops/custom_impls.py @@ -33,7 +33,7 @@ # Fused FP matmul. -# Disabled: See https://github.com/nod-ai/SHARK-Platform/issues/44 +# Disabled: See https://github.com/nod-ai/shark-ai/issues/44 # @matmul.override(Tensor, Tensor) # def matmul_mmtfp_tensor_tensor(lhs, rhs, *, transpose_rhs: bool): # lhs = unbox_tensor(lhs) diff --git a/sharktank/sharktank/utils/export_artifacts.py b/sharktank/sharktank/utils/export_artifacts.py index 057d3b664..9deade56c 100644 --- a/sharktank/sharktank/utils/export_artifacts.py +++ b/sharktank/sharktank/utils/export_artifacts.py @@ -25,7 +25,7 @@ class ExportMlirException(Exception): - """SHARK-Platform export MLIR exception that preserves the command line and error output.""" + """shark-ai export MLIR exception that preserves the command line and error output.""" def __init__(self, process: subprocess.CompletedProcess, cwd: str): try: diff --git a/sharktank/tests/ops/ops_test.py b/sharktank/tests/ops/ops_test.py index 8b37525e5..ad6759ce6 100644 --- a/sharktank/tests/ops/ops_test.py +++ b/sharktank/tests/ops/ops_test.py @@ -136,7 +136,7 @@ def testMatchFail(self): ): ops.matmul(1, 2) - @unittest.skip("https://github.com/nod-ai/SHARK-Platform/issues/44") + @unittest.skip("https://github.com/nod-ai/shark-ai/issues/44") def testTorchImplTransposedRHS(self): ops._registry._test_enable_last_op_dispatch(True) t1 = torch.rand(32, 16, dtype=torch.float32) @@ -149,7 +149,7 @@ def testTorchImplTransposedRHS(self): ops.custom_impls.matmul_mmtfp_tensor_tensor, ) - @unittest.skip("https://github.com/nod-ai/SHARK-Platform/issues/44") + @unittest.skip("https://github.com/nod-ai/shark-ai/issues/44") def testTorchImplNonTransposedRHS(self): ops._registry._test_enable_last_op_dispatch(True) t1 = torch.rand(32, 16, dtype=torch.float32) @@ -162,7 +162,7 @@ def testTorchImplNonTransposedRHS(self): ops.custom_impls.matmul_mmtfp_tensor_tensor, ) - @unittest.skip("https://github.com/nod-ai/SHARK-Platform/issues/44") + @unittest.skip("https://github.com/nod-ai/shark-ai/issues/44") def testTorchImplTransposedPrimitiveRHS(self): ops._registry._test_enable_last_op_dispatch(True) t1 = torch.rand(32, 16, dtype=torch.float32) diff --git a/shortfin/README.md b/shortfin/README.md index 13ee20966..6269ca702 100644 --- a/shortfin/README.md +++ b/shortfin/README.md @@ -7,7 +7,7 @@ and serving engine. Shortfin consists of these major components: [IREE](https://github.com/iree-org/iree) * Python bindings for the underlying inference library * Example applications in - ['shortfin_apps'](https://github.com/nod-ai/SHARK-Platform/tree/main/shortfin/python/shortfin_apps) + ['shortfin_apps'](https://github.com/nod-ai/shark-ai/tree/main/shortfin/python/shortfin_apps) built using the python bindings ## Prerequisites diff --git a/shortfin/pyproject.toml b/shortfin/pyproject.toml index 7c4ed8a33..1abb49ef6 100644 --- a/shortfin/pyproject.toml +++ b/shortfin/pyproject.toml @@ -31,7 +31,7 @@ requires-python = ">= 3.10" dynamic = ["version"] [project.urls] -Repository = "https://github.com/nod-ai/SHARK-Platform" +Repository = "https://github.com/nod-ai/shark-ai" Documentation = "https://shortfin.readthedocs.io/en/latest/" [project.optional-dependencies] diff --git a/tuner/README.md b/tuner/README.md index e6a515729..3737f6bdf 100644 --- a/tuner/README.md +++ b/tuner/README.md @@ -33,5 +33,5 @@ documentation](https://iree.dev/building-from-source/getting-started/#python-bin ## Examples Check the `examples` directory for sample tuners implemented with `libtuner`. -The [`dispatch` example](https://github.com/nod-ai/SHARK-Platform/tree/main/tuner/examples/dispatch) +The [`dispatch` example](https://github.com/nod-ai/shark-ai/tree/main/tuner/examples/dispatch) should be a good starting point for most users. diff --git a/tuner/pyproject.toml b/tuner/pyproject.toml index 1661a7744..c36326bf7 100644 --- a/tuner/pyproject.toml +++ b/tuner/pyproject.toml @@ -21,4 +21,4 @@ requires-python = ">= 3.10" dynamic = ["version"] [project.urls] -Repository = "https://github.com/nod-ai/SHARK-Platform" +Repository = "https://github.com/nod-ai/shark-ai" diff --git a/tuner/tuner/candidate_gen.py b/tuner/tuner/candidate_gen.py index 2f21520f0..b50df12d5 100644 --- a/tuner/tuner/candidate_gen.py +++ b/tuner/tuner/candidate_gen.py @@ -74,7 +74,7 @@ def apply_configuration( class DispatchTuner(DispatchParser): - # TODO(https://github.com/nod-ai/SHARK-Platform/issues/453): Remove this in favor of configuring using transform dialect. + # TODO(https://github.com/nod-ai/shark-ai/issues/453): Remove this in favor of configuring using transform dialect. @abstractmethod def apply_params( self,