From b6bc3dff21a3ed901b7fdbc67bb69427c9f56263 Mon Sep 17 00:00:00 2001 From: Pranjalya Tiwari Date: Fri, 14 Jul 2023 09:17:00 +0530 Subject: [PATCH 1/7] :wolf: fix python tokenizer path --- src/transformer_deploy/utils/python_tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformer_deploy/utils/python_tokenizer.py b/src/transformer_deploy/utils/python_tokenizer.py index 543ecbc..5a33665 100644 --- a/src/transformer_deploy/utils/python_tokenizer.py +++ b/src/transformer_deploy/utils/python_tokenizer.py @@ -43,7 +43,7 @@ def initialize(self, args: Dict[str, str]) -> None: """ # more variables in https://github.com/triton-inference-server/python_backend/blob/main/src/python.cc - path: str = str(Path(args["model_repository"]).parent.absolute()) + path: str = str(Path(args["model_repository"]).absolute() / args["model_version"]) self.tokenizer = AutoTokenizer.from_pretrained(path) model_config = AutoConfig.from_pretrained(path) self.model_input_names = self.tokenizer.model_input_names From ee6845910b07ddd0e69a7f1da3ea6e5421ffd85f Mon Sep 17 00:00:00 2001 From: Pranjalya Date: Mon, 24 Jul 2023 00:59:45 +0530 Subject: [PATCH 2/7] :cat: new dockerfile --- Dockerfile | 8 ++++---- requirements_gpu.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index f618a2f..0ca2e12 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/tritonserver:22.07-py3 +FROM nvcr.io/nvidia/tritonserver:23.06-py3 # see .dockerignore to check what is transfered @@ -8,10 +8,10 @@ RUN apt-get update && \ python3-distutils \ python3-venv \ python3-pip \ - apt-get clean + && apt-get clean -ARG UID=1000 -ARG GID=1000 +ARG UID=10000 +ARG GID=10000 RUN addgroup --gid $GID ubuntu && \ useradd -d /home/ubuntu -ms /bin/bash -g ubuntu -G sudo -u $UID ubuntu ## Switch to ubuntu user by default. diff --git a/requirements_gpu.txt b/requirements_gpu.txt index 8060dfb..dbb7685 100644 --- a/requirements_gpu.txt +++ b/requirements_gpu.txt @@ -1,5 +1,5 @@ onnxruntime-gpu==1.13.1 -nvidia-tensorrt==8.4.1.5 +nvidia-tensorrt onnx_graphsurgeon polygraphy cupy-cuda117 From f0c4a63c07c73cbd4ae89ae41b944e749ea7cec9 Mon Sep 17 00:00:00 2001 From: Pranjalya Tiwari Date: Tue, 25 Jul 2023 09:10:01 +0530 Subject: [PATCH 3/7] Update Dockerfile --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0ca2e12..4ae0138 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,8 +10,8 @@ RUN apt-get update && \ python3-pip \ && apt-get clean -ARG UID=10000 -ARG GID=10000 +ARG UID=1000 +ARG GID=1000 RUN addgroup --gid $GID ubuntu && \ useradd -d /home/ubuntu -ms /bin/bash -g ubuntu -G sudo -u $UID ubuntu ## Switch to ubuntu user by default. From 2ea79c9e58d9c5072d7d5c4a0ffc0b16d664549b Mon Sep 17 00:00:00 2001 From: Pranjalya Tiwari Date: Mon, 31 Jul 2023 00:03:34 +0530 Subject: [PATCH 4/7] updated tensorrt --- requirements_gpu.txt | 2 +- src/transformer_deploy/convert.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_gpu.txt b/requirements_gpu.txt index dbb7685..dd276cd 100644 --- a/requirements_gpu.txt +++ b/requirements_gpu.txt @@ -1,5 +1,5 @@ onnxruntime-gpu==1.13.1 -nvidia-tensorrt onnx_graphsurgeon polygraphy cupy-cuda117 +tensorrt \ No newline at end of file diff --git a/src/transformer_deploy/convert.py b/src/transformer_deploy/convert.py index e22c2b2..dd52e9b 100644 --- a/src/transformer_deploy/convert.py +++ b/src/transformer_deploy/convert.py @@ -295,7 +295,7 @@ def get_pytorch_infer(model: PreTrainedModel, cuda: bool, task: str): logging.info("preparing TensorRT (FP16) benchmark") try: import tensorrt as trt - from tensorrt.tensorrt import ICudaEngine, Logger, Runtime + from tensorrt import ICudaEngine, Logger, Runtime from transformer_deploy.backends.trt_utils import build_engine, load_engine, save_engine except ImportError: From 4a8f3571dfb74da7d46d91165d6e99fb81f7f8d2 Mon Sep 17 00:00:00 2001 From: Pranjalya Tiwari Date: Mon, 31 Jul 2023 00:15:47 +0530 Subject: [PATCH 5/7] update files --- src/transformer_deploy/backends/trt_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformer_deploy/backends/trt_utils.py b/src/transformer_deploy/backends/trt_utils.py index a9df232..ab87e10 100644 --- a/src/transformer_deploy/backends/trt_utils.py +++ b/src/transformer_deploy/backends/trt_utils.py @@ -24,7 +24,7 @@ import tensorrt as trt import torch from tensorrt import ICudaEngine, IExecutionContext, ILayer, INetworkDefinition, Logger, Runtime -from tensorrt.tensorrt import Builder, IBuilderConfig, IElementWiseLayer, IOptimizationProfile, IReduceLayer, OnnxParser +from tensorrt import Builder, IBuilderConfig, IElementWiseLayer, IOptimizationProfile, IReduceLayer, OnnxParser @dataclass From f6111d390bd519d72dbd3a5e530ad0102f40d3db Mon Sep 17 00:00:00 2001 From: Pranjalya Tiwari Date: Mon, 31 Jul 2023 23:47:39 +0530 Subject: [PATCH 6/7] :wolf: update tensorrt version --- demo/generative-model/gpt2.ipynb | 2 +- demo/quantization/quantization_end_to_end.ipynb | 2 +- demo/torchdynamo/benchmark.ipynb | 2 +- demo/torchdynamo/dynamo_utils.py | 2 +- docs/python.md | 4 ++-- src/transformer_deploy/t5_utils/conversion_utils.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/demo/generative-model/gpt2.ipynb b/demo/generative-model/gpt2.ipynb index c3c0c9d..5784e75 100644 --- a/demo/generative-model/gpt2.ipynb +++ b/demo/generative-model/gpt2.ipynb @@ -146,7 +146,7 @@ "import tensorrt as trt\n", "import torch\n", "from tensorrt import ICudaEngine\n", - "from tensorrt.tensorrt import Logger, Runtime\n", + "from tensorrt import Logger, Runtime\n", "from transformers import AutoTokenizer, BatchEncoding, GPT2LMHeadModel, AutoModelForCausalLM\n", "from transformers.modeling_outputs import BaseModelOutputWithPastAndCrossAttentions\n", "from transformer_deploy.utils.generative_model import GPTModelWrapper\n", diff --git a/demo/quantization/quantization_end_to_end.ipynb b/demo/quantization/quantization_end_to_end.ipynb index 4911ef0..75b97ef 100644 --- a/demo/quantization/quantization_end_to_end.ipynb +++ b/demo/quantization/quantization_end_to_end.ipynb @@ -228,7 +228,7 @@ "import torch\n", "import transformers\n", "from datasets import load_dataset, load_metric\n", - "from tensorrt.tensorrt import IExecutionContext, Logger, Runtime\n", + "from tensorrt import IExecutionContext, Logger, Runtime\n", "\n", "from transformers import (\n", " AutoModelForSequenceClassification,\n", diff --git a/demo/torchdynamo/benchmark.ipynb b/demo/torchdynamo/benchmark.ipynb index b06e44c..0e41ee9 100644 --- a/demo/torchdynamo/benchmark.ipynb +++ b/demo/torchdynamo/benchmark.ipynb @@ -183,7 +183,7 @@ "\n", "import gc\n", "import tensorrt as trt\n", - "from tensorrt.tensorrt import ICudaEngine, Logger, Runtime\n", + "from tensorrt import ICudaEngine, Logger, Runtime\n", "import onnx\n", "from transformer_deploy.backends.trt_utils import build_engine, save_engine" ] diff --git a/demo/torchdynamo/dynamo_utils.py b/demo/torchdynamo/dynamo_utils.py index 0e52ee3..0b7edf2 100644 --- a/demo/torchdynamo/dynamo_utils.py +++ b/demo/torchdynamo/dynamo_utils.py @@ -25,7 +25,7 @@ from matplotlib.axes import Axes from matplotlib.figure import Figure from onnxruntime import GraphOptimizationLevel -from tensorrt.tensorrt import Runtime +from tensorrt import Runtime from torch._C._autograd import ProfilerActivity from torchdynamo.eval_frame import OptimizeContext from transformers import PreTrainedModel diff --git a/docs/python.md b/docs/python.md index d4b3d0c..c10bfb4 100644 --- a/docs/python.md +++ b/docs/python.md @@ -21,7 +21,7 @@ Now we need to convert to TensorRT: ```python import tensorrt as trt -from tensorrt.tensorrt import Logger, Runtime +from tensorrt import Logger, Runtime from transformer_deploy.backends.trt_utils import build_engine @@ -50,7 +50,7 @@ Now the engine is ready, we can prepare the inference: ```python import torch -from tensorrt.tensorrt import IExecutionContext +from tensorrt import IExecutionContext from transformer_deploy.backends.trt_utils import get_binding_idxs diff --git a/src/transformer_deploy/t5_utils/conversion_utils.py b/src/transformer_deploy/t5_utils/conversion_utils.py index a26fb33..f04b4e8 100644 --- a/src/transformer_deploy/t5_utils/conversion_utils.py +++ b/src/transformer_deploy/t5_utils/conversion_utils.py @@ -630,7 +630,7 @@ def onnx_to_tensorrt_model( runtime, onnx_model_path, trt_logger, workspace_size, quantization, tensorrt_model_path, **kwargs ) -> Callable[[Dict[str, torch.Tensor]], Dict[str, torch.Tensor]]: try: - from tensorrt.tensorrt import ICudaEngine + from tensorrt import ICudaEngine from transformer_deploy.backends.trt_utils import build_engine, load_engine, save_engine From 285d92465f45d18df1a3f3df48043e0b4c93b398 Mon Sep 17 00:00:00 2001 From: Pranjalya Tiwari Date: Mon, 31 Jul 2023 23:55:16 +0530 Subject: [PATCH 7/7] :cat2: fix tensorrt version --- requirements_gpu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_gpu.txt b/requirements_gpu.txt index dd276cd..55d2850 100644 --- a/requirements_gpu.txt +++ b/requirements_gpu.txt @@ -2,4 +2,4 @@ onnxruntime-gpu==1.13.1 onnx_graphsurgeon polygraphy cupy-cuda117 -tensorrt \ No newline at end of file +tensorrt==8.6.1 \ No newline at end of file