diff --git a/Dockerfile b/Dockerfile
index 16f3e69d1b..4174b34ddd 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -134,8 +134,8 @@ FROM ${TENSORFLOW_IMAGE} AS trtserver_tf
 ############################################################################
 FROM ${BASE_IMAGE} AS trtserver_build
 
-ARG TRTIS_VERSION=1.13.0dev
-ARG TRTIS_CONTAINER_VERSION=20.05dev
+ARG TRTIS_VERSION=1.13.0
+ARG TRTIS_CONTAINER_VERSION=20.03.1
 
 # libgoogle-glog0v5 is needed by caffe2 libraries.
 # libcurl4-openSSL-dev is needed for GCS
@@ -319,8 +319,8 @@ ENTRYPOINT ["/opt/tritonserver/nvidia_entrypoint.sh"]
 ############################################################################
 FROM ${BASE_IMAGE}
 
-ARG TRTIS_VERSION=1.13.0dev
-ARG TRTIS_CONTAINER_VERSION=20.05dev
+ARG TRTIS_VERSION=1.13.0
+ARG TRTIS_CONTAINER_VERSION=20.03.1
 
 ENV TENSORRT_SERVER_VERSION ${TRTIS_VERSION}
 ENV NVIDIA_TENSORRT_SERVER_VERSION ${TRTIS_CONTAINER_VERSION}
diff --git a/README.rst b/README.rst
index e65f7cff38..11fa36cb44 100644
--- a/README.rst
+++ b/README.rst
@@ -35,12 +35,6 @@ NVIDIA Triton Inference Server
     the inference server in** `Roadmap
     <https://github.com/NVIDIA/triton-inference-server#roadmap>`_.
 
-    **LATEST RELEASE: You are currently on the master branch which
-    tracks under-development progress towards the next release. The
-    latest release of the Triton Inference Server is 1.12.0 and
-    is available on branch** `r20.03
-    <https://github.com/NVIDIA/triton-inference-server/tree/r20.03>`_.
-
 .. overview-begin-marker-do-not-remove
 
 NVIDIA Triton Inference Server provides a cloud inferencing solution
@@ -49,22 +43,40 @@ via an HTTP or GRPC endpoint, allowing remote clients to request
 inferencing for any model being managed by the server. For edge
 deployments, Triton Server is also available as a shared library with
 an API that allows the full functionality of the server to be included
-directly in an application. Triton Server provides the following
-features:
+directly in an application.
+
+What's New In 1.13.0
+--------------------
+
+* Updates for KFserving HTTP/REST and GRPC protocols and corresponding Python
+  and C++ client libraries. See Roadmap section for more information.
+
+* Update GRPC version to 1.24.0.
+
+* Several issues with S3 storage were resolved.
+
+* Fix last_inferrence_timestamp value to correctly show the time when inference
+  last occurred for each model.
+
+* The Caffe2 backend is deprecated. Support for Caffe2 models will be removed in
+  a future release.
+
+Features
+--------
 
 * `Multiple framework support
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/model_repository.html#framework-model-definition>`_. The
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/model_repository.html#framework-model-definition>`_. The
   server can manage any number and mix of models (limited by system
   disk and memory resources). Supports TensorRT, TensorFlow GraphDef,
   TensorFlow SavedModel, ONNX, PyTorch, and Caffe2 NetDef model
   formats. Also supports TensorFlow-TensorRT and ONNX-TensorRT
   integrated models. Variable-size input and output tensors are
   allowed if supported by the framework. See `Capabilities
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/capabilities.html#capabilities>`_
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/capabilities.html#capabilities>`_
   for detailed support information for each framework.
 
 * `Concurrent model execution support
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/model_configuration.html#instance-groups>`_. Multiple
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/model_configuration.html#instance-groups>`_. Multiple
   models (or multiple instances of the same model) can run
   simultaneously on the same GPU.
 
@@ -72,13 +84,13 @@ features:
   can accept requests for a batch of inputs and respond with the
   corresponding batch of outputs. Triton Server also supports multiple
   `scheduling and batching
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/model_configuration.html#scheduling-and-batching>`_
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/model_configuration.html#scheduling-and-batching>`_
   algorithms that combine individual inference requests together to
   improve inference throughput. These scheduling and batching
   decisions are transparent to the client requesting inference.
 
 * `Custom backend support
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/model_repository.html#custom-backends>`_. Triton
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/model_repository.html#custom-backends>`_. Triton
   Server allows individual models to be implemented with custom
   backends instead of by a deep-learning framework. With a custom
   backend a model can implement any logic desired, while still
@@ -86,7 +98,7 @@ features:
   batching and other features provided by the server.
 
 * `Ensemble support
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/models_and_schedulers.html#ensemble-models>`_. An
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/models_and_schedulers.html#ensemble-models>`_. An
   ensemble represents a pipeline of one or more models and the
   connection of input and output tensors between those models. A
   single inference request to an ensemble will trigger the execution
@@ -96,37 +108,31 @@ features:
   all system GPUs.
 
 * Triton Server provides `multiple modes for model management
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/model_management.html>`_. These
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/model_management.html>`_. These
   model management modes allow for both implicit and explicit loading
   and unloading of models without requiring a server restart.
 
 * `Model repositories
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/model_repository.html#>`_
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/model_repository.html#>`_
   may reside on a locally accessible file system (e.g. NFS), in Google
   Cloud Storage or in Amazon S3.
 
 * Readiness and liveness `health endpoints
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/http_grpc_api.html#health>`_
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/http_grpc_api.html#health>`_
   suitable for any orchestration or deployment framework, such as
   Kubernetes.
 
 * `Metrics
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/metrics.html>`_
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/metrics.html>`_
   indicating GPU utilization, server throughput, and server latency.
 
 * `C library inferface
-  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/library_api.html>`_
+  <https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/library_api.html>`_
   allows the full functionality of Triton Server to be included
   directly in an application.
 
 .. overview-end-marker-do-not-remove
 
-The current release of the Triton Inference Server is 1.12.0 and
-corresponds to the 20.02 release of the tensorrtserver container on
-`NVIDIA GPU Cloud (NGC) <https://ngc.nvidia.com>`_. The branch for
-this release is `r20.03
-<https://github.com/NVIDIA/triton-inference-server/tree/r20.03>`_.
-
 Backwards Compatibility
 -----------------------
 
@@ -182,7 +188,7 @@ already understood. The primary reasons for the name change are to :
   frameworks and formats.
 
 * Highlight that the server is aligning HTTP/REST and GRPC protocols
-  with a set of `KFServing community standard inference protocols
+  with a set of `KFServing standard inference protocols
   <https://github.com/kubeflow/kfserving/tree/master/docs/predict-api/v2>`_
   that have been proposed by the `KFServing project
   <https://github.com/kubeflow/kfserving>`_.
@@ -190,28 +196,20 @@ already understood. The primary reasons for the name change are to :
 Transitioning from the current protocols (version 1) to the new
 protocols (version 2) will take place over several releases.
 
-* **Current master**
+* 20.03.1
 
-  * Alpha release of server support for KFServing community standard
-    GRPC and HTTP/REST inference protocol.
-  * Alpha release of Python client library that uses KFServing
-    community standard GRPC and HTTP/REST inference protocol.
-  * See `client documentation
-    <https://github.com/NVIDIA/triton-inference-server/tree/master/docs/client_experimental.rst>`_
-    for description and examples showing how to enable and use the new
-    GRPC and HTTP/REST inference protocol and Python client library.
-  * Existing HTTP/REST and GRPC protocols, and existing client APIs
-    continue to be supported and remain the default protocols.
-
-* 20.05
-
-  * Beta release of KFServing community standard HTTP/REST and GRPC
-    inference protocol support in server, Python client, and C++
-    client.
+  * The Triton updates originally planned for 20.05 are now included
+    in the 20.03.1 release (Triton version 1.13.0).
+  * Beta release of KFServing HTTP/REST and GRPC inference protocol
+    support in server, Python client, and C++ client.
   * Beta release of the `HTTP/REST and GRPC extensions
     <https://github.com/NVIDIA/triton-inference-server/tree/master/docs/protocol>`_
     to the KFServing inference protocol.
-  * Existing HTTP/REST and GRPC protocols are deprecated but remain
+  * See `client documentation
+    <https://github.com/NVIDIA/triton-inference-server/blob/r20.03.1/docs/client_experimental.rst>`_
+    for description and examples showing how to enable and use the new
+    client libraries.
+  * Existing V1 HTTP/REST and GRPC protocols are deprecated but remain
     the default.
   * Existing shared library inferface defined in trtserver.h continues
     to be supported but is deprecated.
@@ -220,36 +218,30 @@ protocols (version 2) will take place over several releases.
 
 * 20.06
 
-  * Triton Server version 2.0.0.
-  * KFserving community standard HTTP/REST and GRPC inference
-    protocols plus all Triton `extensions
-    <https://github.com/NVIDIA/triton-inference-server/tree/master/docs/protocol>`_
-    become the default and only supported protocols for the server.
-  * C++ and Python client libraries based on the KFServing standard
-    inference protocols become the default and only supported client
-    libraries.
-  * The new shared library interface defined in tritonserver.h becomes
-    the default and only supported shared library interface.
-  * Original C++ and Python client libraries are removed. Release
-    20.05 is the last release to support these libraries.
-  * Original shared library interface defined in trtserver.h is
-    removed. Release 20.05 is the last release to support the
-    trtserver.h shared library interface.
+  * Triton Server will release two containers, one for version 1.14.0
+    and one for version 2.0.0.
+  * The Triton 2.0.0 version will contain only the KFServing HTTP/REST
+    and GRPC inference protocols and the corresponding V2 Python and
+    C++ client libraries and examples.
+  * The Triton 2.0.0 version will support the shared library interface
+    defined in tritonserver.h.
+  * The 1.14.0 release will likely be the last release for Triton V1.
+  * The Triton 1.14.0 version will contain only the V1 HTTP/REST
+    and GRPC inference protocols and the corresponding V1 Python and
+    C++ client libraries and examples.
+  * The Triton 1.14.0 version will support the shared library interface
+    defined in tensorrtserver.h.
 
 Throughout the transition the model repository struture and custom
 backend APIs will remain unchanged so that any existing model
 repository and custom backends will continue to work with Triton
 Server.
 
-In the 20.06 release there will be some minor changes to the
+In the Triton 2.0.0 release there will be some minor changes to the
 tritonserver command-line executable arguments. It will be necessary
-to revisit and possible adjust invocations of tritonserver executable.
-
-In the 20.06 release there will be some minor changes to the model
-configuration schema. It is expected that these changes will not
-impact the vast majority of model configurations. For impacted models
-the model configuration will need minor edits to become compatible
-with Triton Server version 2.0.0.
+to revisit and possible adjust invocations of tritonserver
+executable. Triton 1.14.0 command-line will remain unchanged from
+earlier version 1 releases.
 
 Documentation
 -------------
@@ -266,7 +258,7 @@ and for `earlier releases
 <https://docs.nvidia.com/deeplearning/sdk/inference-server-archived/index.html>`_.
 
 An `FAQ
-<https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/faq.html>`_
+<https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-guide/docs/faq.html>`_
 provides answers for frequently asked questions.
 
 READMEs for deployment examples can be found in subdirectories of
diff --git a/VERSION b/VERSION
index 156c282d62..feaae22bac 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.13.0dev
+1.13.0
diff --git a/docs/build.rst b/docs/build.rst
index 9d17a94454..6b2a7aa0e6 100644
--- a/docs/build.rst
+++ b/docs/build.rst
@@ -368,7 +368,7 @@ Building A Custom Backend
 
 The source repository contains several example custom backends in the
 `src/custom directory
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/custom>`_.
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/custom>`_.
 These custom backends are built using CMake::
 
   $ mkdir builddir
@@ -426,11 +426,11 @@ Using the Custom Instance Wrapper Class
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The custom backend SDK provides a `CustomInstance Class
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/custom/sdk/custom_instance.h>`_.
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/custom/sdk/custom_instance.h>`_.
 The CustomInstance class is a C++ wrapper class that abstracts away the
 backend C-API for ease of use. All of the example custom backends in
 `src/custom directory
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/custom>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/custom>`_
 derive from the CustomInstance class and can be referenced for usage.
 
 Building the Client Libraries and Examples
diff --git a/docs/client.rst b/docs/client.rst
index 9a448e5198..5e3b80c652 100644
--- a/docs/client.rst
+++ b/docs/client.rst
@@ -273,20 +273,20 @@ Client API
 
 The C++ client API exposes a class-based interface for querying server
 and model status and for performing inference. The commented interface
-is available at `src/clients/c++/library/request.h.in
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/c%2B%2B/library/request.h.in>`_
+is available at `src/clients/c++/api_vi/library/request.h.in
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/c%2B%2B/api_v1/library/request.h.in>`_
 and in the API Reference.
 
 The Python client API provides similar capabilities as the C++
 API. The commented interface is available at
 `src/clients/python/api_v1/library/\_\_init\_\_.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/library/__init__.py>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/library/__init__.py>`_
 and in the API Reference.
 
 A simple C++ example application at `src/clients/c++/api_v1/examples/simple\_client.cc.in
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/c%2B%2B/api_v1/examples/simple_client.cc.in>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/c%2B%2B/api_v1/examples/simple_client.cc.in>`_
 and a Python version at `src/clients/python/api_v1/examples/simple\_client.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/examples/simple_client.py>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/examples/simple_client.py>`_
 demonstrate basic client API usage.
 
 To run the C++ version of the simple example, first build or
@@ -316,9 +316,9 @@ System Shared Memory
 
 A simple C++ example application using system shared memory at
 `src/clients/c++/api_v1/examples/simple\_shm\_client.cc
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/c%2B%2B/api_v1/examples/simple_shm_client.cc>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/c%2B%2B/api_v1/examples/simple_shm_client.cc>`_
 and a Python version at `src/clients/python/api_v1/examples/simple\_shm\_client.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/examples/simple_shm_client.py>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/examples/simple_shm_client.py>`_
 demonstrate the usage of shared memory with the client API.
 
 To run the C++ version of the simple system shared memory example, first
@@ -338,7 +338,7 @@ build or download it as described in
   15 - 1 = 14
 
 We have added a simple `system shared memory module
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/library/shared_memory/__init__.py>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/library/shared_memory/__init__.py>`_
 that extends the Python client API to create, set and destroy system shared
 memory. To run the Python version of the simple system shared memory example,
 first build or download it as described in
@@ -352,9 +352,9 @@ CUDA Shared Memory
 
 A simple C++ example application using CUDA shared memory at
 `src/clients/c++/api_v1/examples/simple\_cuda\_shm\_client.cc
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/c%2B%2B/api_v1/examples/simple_cuda_shm_client.cc>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/c%2B%2B/api_v1/examples/simple_cuda_shm_client.cc>`_
 and a Python version at `src/clients/python/api_v1/examples/simple\_shm\_client.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/examples/simple_cuda_shm_client.py>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/examples/simple_cuda_shm_client.py>`_
 demonstrate the usage of shared memory with the client API.
 
 To run the C++ version of the simple CUDA shared memory example, first
@@ -374,7 +374,7 @@ $ simple_cuda_shm_client
 15 - 1 = 14
 
 We have added a simple `CUDA shared memory module
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/library/cuda_shared_memory/__init__.py>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/library/cuda_shared_memory/__init__.py>`_
 that extends the Python client API to create, set and destroy CUDA shared
 memory. To run the Python version of the simple CUDA shared memory example,
 first build or download it as described in
@@ -395,9 +395,9 @@ SetRaw().
 
 String tensors are demonstrated in the C++ example application at
 `src/clients/c++/api_v1/examples/simple\_string\_client.cc
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/c%2B%2B/api_v1/examples/simple_string_client.cc>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/c%2B%2B/api_v1/examples/simple_string_client.cc>`_
 and a Python version at `src/clients/python/api_v1/examples/simple\_string\_client.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/examples/simple_string_client.py>`_.
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/examples/simple_string_client.py>`_.
 
 .. _section-client-api-stateful-models:
 
@@ -421,10 +421,10 @@ async_run() methods in the Python API.
 The use of correlation ID and start and end flags are demonstrated in
 the C++ example application at
 `src/clients/c++/api_v1/examples/simple\_sequence\_client.cc
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/c%2B%2B/api_v1/examples/simple_sequence_client.cc>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/c%2B%2B/api_v1/examples/simple_sequence_client.cc>`_
 and a Python version at
 `src/clients/python/api_v1/examples/simple\_sequence\_client.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/examples/simple_sequence_client.py>`_.
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/examples/simple_sequence_client.py>`_.
 
 Shape Tensor
 ^^^^^^^^^^^^
diff --git a/docs/client_example.rst b/docs/client_example.rst
index 3ecd9ca203..84f630af16 100644
--- a/docs/client_example.rst
+++ b/docs/client_example.rst
@@ -189,10 +189,10 @@ Image Classification Example Application
 
 The image classification example that uses the C++ client API is
 available at `src/clients/c++/api_v1/examples/image\_client.cc
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/c%2B%2B/api_v1/examples/image_client.cc>`_. The
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/c%2B%2B/api_v1/examples/image_client.cc>`_. The
 Python version of the image classification client is available at
 `src/clients/python/api_v1/examples/image\_client.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/examples/image_client.py>`_.
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/examples/image_client.py>`_.
 
 To use image\_client (or image\_client.py) you must first have a
 running inference server that is serving one or more image
@@ -289,7 +289,7 @@ on all images in the directory::
 
 The grpc\_image\_client.py application at available at
 `src/clients/python/api_v1/examples/grpc\_image\_client.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/examples/grpc_image_client.py>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/examples/grpc_image_client.py>`_
 behaves the same as the image\_client except that instead of using the
 inference server client library it uses the GRPC generated client
 library to communicate with the server.
@@ -306,10 +306,10 @@ to send the raw image binaries in the request and receive
 classification results without preprocessing the images on the
 client. The ensemble image classification example that uses the C++
 client API is available at `src/clients/c++/api_v1/examples/ensemble\_image\_client.cc
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/c%2B%2B/api_v1/examples/ensemble_image_client.cc>`_.
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/c%2B%2B/api_v1/examples/ensemble_image_client.cc>`_.
 The Python version of the image classification client is available at
 `src/clients/python/api_v1/examples/ensemble\_image\_client.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/examples/ensemble_image_client.py>`_.
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/examples/ensemble_image_client.py>`_.
 
 To use ensemble\_image\_client (or ensemble\_image\_client.py) you must first
 have a running inference server that is serving the
@@ -382,7 +382,7 @@ Performance Measurement Application
 -----------------------------------
 
 The perf\_client application located at `src/clients/c++/perf\_client
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/c%2B%2B/perf_client>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/c%2B%2B/perf_client>`_
 uses the C++ client API to send concurrent requests to the server to
 measure latency and inferences-per-second under varying client
 loads. See the :ref:`section-perf-client` for a full description.
diff --git a/docs/client_experimental.rst b/docs/client_experimental.rst
index 570f721638..427c28ba7d 100644
--- a/docs/client_experimental.rst
+++ b/docs/client_experimental.rst
@@ -30,35 +30,37 @@
 Experimental Client
 ===================
 
-Alpha versions of two Python client libraries is available that use
-the new HTTP/REST and GRPC protocol based on the `community standard
-inference protocols
-<https://github.com/kubeflow/kfserving/docs/predict-api/v2>`_ that
-have been proposed by the `KFServing project
-<https://github.com/kubeflow/kfserving>`_. These Python libraries also
+Triton includes beta versions of the version 2 Python and C++ client
+libraries and examples. The libraries use the new HTTP/REST and GRPC
+`KFServing protocols
+<https://github.com/kubeflow/kfserving/docs/predict-api/v2>`_ and also
 expose all the functionality expressed in the Triton `protocol
 extensions
-<https://docs.nvidia.com/deeplearning/sdk/triton-inference-server-master-branch-guide/docs/protocol>`_.
+<https://github.com/NVIDIA/triton-inference-server/tree/master/docs/protocol>`_.
 
-To try the new client libraries, first get the Python client library
-that uses the new protocol following directions in
-:ref:`section-getting-the-client-libraries`.
+To try the new client libraries and examples, first follow directions
+in :ref:`section-getting-the-client-libraries`.
 
-The Python GRPC client interface documentation is available at
-`src/clients/python/experimental\_api\_v2/library/grpcclient.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/experimental_api_v2/library/grpcclient.py>`_,
-`src/clients/python/experimental\_api\_v2/library/httpclient.py
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/experimental_api_v2/library/httpclient.py>`_
-and in the API Reference.
+Several `examples
+<https://github.com/NVIDIA/triton-inference-server/tree/r20.03.1/src/clients/python/experimental_api_v2/examples>`_
+demonstrate the new Python client library and the code is documented
+in `grpcclient.py
+<https://github.com/NVIDIA/triton-inference-server/blob/r20.03.1/src/clients/python/experimental_api_v2/library/grpcclient.py>`_
+and `httpclient.py
+<https://github.com/NVIDIA/triton-inference-server/blob/r20.03.1/src/clients/python/experimental_api_v2/library/httpclient.py>`_.
 
-Examples are available in
-`src/clients/python/experimental\_api\_v2/examples
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/experimental_api_v2/examples>`_. The
-examples that start with **grpc_** use the `protoc compiler to
+Similarly there are `C++ client examples
+<https://github.com/NVIDIA/triton-inference-server/tree/r20.03.1/src/clients/c%2B%2B/experimental_api_v2/examples>`_
+and documentation in `grpc_client.h and http_client.h
+<https://github.com/NVIDIA/triton-inference-server/tree/r20.03.1/src/clients/c%2B%2B/experimental_api_v2/library>`_.
+
+The examples that start with **grpc_** use the `protoc compiler to
 generate the GRPC client stub <https://grpc.io/docs/guides/>`_, the
-examples that start with **simple_grpc_** use the Python GRPC client
-library, and the examples that start with **simple_http_** use the
-Python HTTP/REST client library.
+examples that start with **simple_grpc_** use the GRPC client library,
+and the examples that start with **simple_http_** use the HTTP/REST
+client library.
 
-For Triton to support the new HTTP/REST and GRPC protocols the server
-must be run with the -\\-api-version=2 flag.
+For the 20.03.1 release, for Triton to support the new HTTP/REST and
+GRPC protocols the server must be run with the -\\-api-version=2
+flag. This requirement will be removed in the 20.06 and later
+releases.
diff --git a/docs/contribute.rst b/docs/contribute.rst
index 8e78d51483..92ba9845a1 100644
--- a/docs/contribute.rst
+++ b/docs/contribute.rst
@@ -31,7 +31,7 @@ Contributing
 Contributions to Triton Inference Server are more than welcome. To
 contribute make a pull request and follow the guidelines outlined in
 the `CONTRIBUTING
-<https://github.com/NVIDIA/triton-inference-server/blob/master/CONTRIBUTING.md>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/CONTRIBUTING.md>`_
 document.
 
 Coding Convention
diff --git a/docs/faq.rst b/docs/faq.rst
index 92205acfd2..c89065da87 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -74,7 +74,7 @@ can possibly add another language if there is a need.
 We provide the GRPC API as a way to generate your own client library
 for a large number of languages. By following the official GRPC
 documentation and using `src/core/grpc\_service.proto
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/core/grpc_service.proto>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/core/grpc_service.proto>`_
 you can generate language bindings for all the languages supported by
 GRPC. We provide two examples of this:
 
@@ -82,7 +82,7 @@ GRPC. We provide two examples of this:
   `https://github.com/NVIDIA/triton-inference-server/tree/master/src/clients/go`_.
 
 - Python:
-  `https://github.com/NVIDIA/triton-inference-server/blob/master/src/clients/python/api_v1/examples/grpc_image_client.py`_.
+  `https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/clients/python/api_v1/examples/grpc_image_client.py`_.
 
 In general the client libraries (and client examples) are meant to be
 just that, examples. We feel the client libraries are well written and
diff --git a/docs/library_api.rst b/docs/library_api.rst
index c66ad1aa9b..9f5a10101a 100644
--- a/docs/library_api.rst
+++ b/docs/library_api.rst
@@ -33,19 +33,19 @@ Library API
 The Triton Inference Server provides a backwards-compatible C API
 that allows the server to be linked directly into a C/C++
 application. The API is documented in `trtserver.h
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/core/trtserver.h>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/core/trtserver.h>`_
 as well as in the API section of the documentation.
 
 A simple example of the library API can be found at
 `src/servers/simple.cc
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/servers/simple.cc>`_. A
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/servers/simple.cc>`_. A
 more complicated example can be found in the files that make up the
 inference server executable, *tritonserver*. The tritonserver executable
 implements the HTTP and GRPC endpoints and uses the library API to
 communicate with the inference server. The primary files composing
 *tritonserver* are `src/servers/main.cc
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/servers/main.cc>`_,
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/servers/main.cc>`_,
 `src/servers/grpc_server.cc
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/servers/grpc_server.cc>`_,
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/servers/grpc_server.cc>`_,
 and `src/servers/http_server.cc
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/servers/http_server.cc>`_.
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/servers/http_server.cc>`_.
diff --git a/docs/model_repository.rst b/docs/model_repository.rst
index d29a5e86cd..15533727d1 100644
--- a/docs/model_repository.rst
+++ b/docs/model_repository.rst
@@ -203,7 +203,7 @@ TensorFlow saves trained models in one of two ways: *GraphDef* or
 have a trained model in TensorFlow, you can save it as a GraphDef
 directly or convert it to a GraphDef by using a script like
 `freeze_graph.py
-<https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py>`_,
+<https://github.com/tensorflow/tensorflow/blob/master-v1/tensorflow/python/tools/freeze_graph.py>`_,
 or save it as a SavedModel using a `SavedModelBuilder
 <https://www.tensorflow.org/serving/serving_basic>`_ or
 `tf.saved_model.simple_save
@@ -277,7 +277,7 @@ files. By default the file or directory must be named model.onnx.
 Notice that some ONNX models may not be supported by the inference
 server as they are not supported by the underlying ONNX Runtime (due
 to either using `stale ONNX opset version
-<https://github.com/Microsoft/onnxruntime/blob/master/docs/Versioning.md#version-matrix>`_
+<https://github.com/Microsoft/onnxruntime/blob/master-v1/docs/Versioning.md#version-matrix>`_
 or containing operators with `unsupported types
 <https://github.com/microsoft/onnxruntime/issues/1122>`_).
 
@@ -401,7 +401,7 @@ Custom Backend API
 ^^^^^^^^^^^^^^^^^^
 
 A custom backend must implement the C interface defined in `custom.h
-<https://github.com/NVIDIA/triton-inference-server/blob/master/src/backends/custom/custom.h>`_. The
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/src/backends/custom/custom.h>`_. The
 interface is also documented in the API Reference.
 
 Example Custom Backend
diff --git a/docs/optimization.rst b/docs/optimization.rst
index 621d2f0696..7121bb4a3b 100644
--- a/docs/optimization.rst
+++ b/docs/optimization.rst
@@ -48,7 +48,7 @@ options, we will use a Caffe2 ResNet50 model that you can obtain by
 following the :ref:`section-quickstart`. As a baseline we use
 perf\_client to determine the performance of the model using a `basic
 model configuration that does not enable any performance features
-<https://github.com/NVIDIA/triton-inference-server/blob/master/docs/examples/model_repository/resnet50_netdef/config.pbtxt>`_::
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/docs/examples/model_repository/resnet50_netdef/config.pbtxt>`_::
 
   $ perf_client -m resnet50_netdef --percentile=95 --concurrency-range 1:4
   ...
@@ -218,7 +218,7 @@ will use an ONNX DenseNet model that you can obtain by following the
 :ref:`section-quickstart`. As a baseline we use perf\_client to
 determine the performance of the model using a `basic model
 configuration that does not enable any performance features
-<https://github.com/NVIDIA/triton-inference-server/blob/master/docs/examples/model_repository/densenet_onnx/config.pbtxt>`_::
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/docs/examples/model_repository/densenet_onnx/config.pbtxt>`_::
 
   $ perf_client -m densenet_onnx --percentile=95 --concurrency-range 1:4
   ...
@@ -281,7 +281,7 @@ we will use a TensorFlow Inception model that you can obtain by
 following the :ref:`section-quickstart`. As a baseline we use
 perf\_client to determine the performance of the model using a `basic
 model configuration that does not enable any performance features
-<https://github.com/NVIDIA/triton-inference-server/blob/master/docs/examples/model_repository/inception_graphdef/config.pbtxt>`_::
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/docs/examples/model_repository/inception_graphdef/config.pbtxt>`_::
 
   $ perf_client -m inception_graphdef --percentile=95 --concurrency-range 1:4
   ...
diff --git a/docs/trace.rst b/docs/trace.rst
index b78e3bbb96..6b1d367468 100644
--- a/docs/trace.rst
+++ b/docs/trace.rst
@@ -95,7 +95,7 @@ Trace Summary Tool
 ^^^^^^^^^^^^^^^^^^
 
 An example `trace summary tool
-<https://github.com/NVIDIA/triton-inference-server/blob/master/qa/common/trace_summary.py>`_
+<https://github.com/NVIDIA/triton-inference-server/blob/master-v1/qa/common/trace_summary.py>`_
 can be used to summarize a set of traces collected from the inference
 server. Basic usage is::