From 9652cc7e16c9d1039ffafd2e1e46a8b1ca4c41e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gabriel=20Mart=C3=ADn=20Bl=C3=A1zquez?=
 <gmartinbdev@gmail.com>
Date: Mon, 8 Jan 2024 12:32:19 +0100
Subject: [PATCH] Add Vertex AI `LLM`s documentation (#222)

---
 .../technical-reference/llm/llmpool.py        | 22 +++++++------
 .../technical-reference/llm/processllm.py     |  4 +--
 .../llm/together_inference_generate.py        |  2 +-
 .../llm/vertexaiendpointllm_generate.py       | 26 +++++++++++++++
 .../llm/vertexaillm_generate.py               | 22 +++++++++++++
 .../technical-reference/llm/vllm_generate.py  |  2 +-
 .../pipeline/pipeline_llmpool_processllm_1.py |  1 +
 .../pipeline/pipeline_llmpool_processllm_2.py |  4 +++
 .../pipeline/pipeline_llmpool_processllm_4.py |  4 +--
 docs/technical-reference/llms.md              | 32 +++++++++++++++++++
 10 files changed, 103 insertions(+), 16 deletions(-)
 create mode 100644 docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py
 create mode 100644 docs/snippets/technical-reference/llm/vertexaillm_generate.py

diff --git a/docs/snippets/technical-reference/llm/llmpool.py b/docs/snippets/technical-reference/llm/llmpool.py
index d7afc5fb43..4ae552072a 100644
--- a/docs/snippets/technical-reference/llm/llmpool.py
+++ b/docs/snippets/technical-reference/llm/llmpool.py
@@ -1,5 +1,6 @@
-from distilabel.tasks import TextGenerationTask, Task
-from distilabel.llm import ProcessLLM, LLM, LLMPool
+from distilabel.llm import LLM, LLMPool, ProcessLLM
+from distilabel.tasks import Task, TextGenerationTask
+
 
 def load_gpt_3(task: Task) -> LLM:
     from distilabel.llm import OpenAILLM
@@ -10,6 +11,7 @@ def load_gpt_3(task: Task) -> LLM:
         num_threads=4,
     )
 
+
 def load_gpt_4(task: Task) -> LLM:
     from distilabel.llm import OpenAILLM
 
@@ -20,13 +22,13 @@ def load_gpt_4(task: Task) -> LLM:
     )
 
 
-pool = LLMPool(llms=[
-    ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_3),
-    ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_4),
-])
-result = pool.generate(
-    inputs=[{"input": "Write a letter for Bob"}], num_generations=2
+pool = LLMPool(
+    llms=[
+        ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_3),
+        ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_4),
+    ]
 )
+result = pool.generate(inputs=[{"input": "Write a letter for Bob"}], num_generations=2)
 pool.teardown()
 # >>> print(result[0][0]["parsed_output"]["generations"], end="\n\n\n\n\n\n---->")
 # Dear Bob,
@@ -34,6 +36,6 @@ def load_gpt_4(task: Task) -> LLM:
 # Life has been keeping me pretty busy lately. [Provide a brief overview of what you've been up to: work, school, family, hobbies, etc.]
 # I've often found myself reminiscing about the good old days, like when we [include a memorable moment or shared experience with Bob].
 # >>> print(result[0][1]["parsed_output"]["generations"])
-# Of course, I'd be happy to draft a sample letter for you. However, I would need some additional 
-# information including who "Bob" is, the subject matter of the letter, the tone (formal or informal), 
+# Of course, I'd be happy to draft a sample letter for you. However, I would need some additional
+# information including who "Bob" is, the subject matter of the letter, the tone (formal or informal),
 # and any specific details or points you'd like to include. Please provide some more context and I'll do my best to assist you.
diff --git a/docs/snippets/technical-reference/llm/processllm.py b/docs/snippets/technical-reference/llm/processllm.py
index 6ee492f3fd..fee31d6d1d 100644
--- a/docs/snippets/technical-reference/llm/processllm.py
+++ b/docs/snippets/technical-reference/llm/processllm.py
@@ -1,5 +1,5 @@
-from distilabel.tasks import TextGenerationTask, Task
-from distilabel.llm import ProcessLLM, LLM
+from distilabel.llm import LLM, ProcessLLM
+from distilabel.tasks import Task, TextGenerationTask
 
 
 def load_gpt_4(task: Task) -> LLM:
diff --git a/docs/snippets/technical-reference/llm/together_inference_generate.py b/docs/snippets/technical-reference/llm/together_inference_generate.py
index 9dab6c1c47..f0ad31bead 100644
--- a/docs/snippets/technical-reference/llm/together_inference_generate.py
+++ b/docs/snippets/technical-reference/llm/together_inference_generate.py
@@ -1,5 +1,5 @@
-from distilabel.tasks import TextGenerationTask
 from distilabel.llm import TogetherInferenceLLM
+from distilabel.tasks import TextGenerationTask
 
 llm = TogetherInferenceLLM(
     model="togethercomputer/llama-2-70b-chat",
diff --git a/docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py b/docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py
new file mode 100644
index 0000000000..f6657b7d78
--- /dev/null
+++ b/docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py
@@ -0,0 +1,26 @@
+from distilabel.llm import VertexAIEndpointLLM
+from distilabel.tasks import TextGenerationTask
+
+llm = VertexAIEndpointLLM(
+    task=TextGenerationTask(),
+    endpoint_id="3466410517680095232",
+    project="experiments-404412",
+    location="us-central1",
+    generation_kwargs={
+        "temperature": 1.0,
+        "max_tokens": 128,
+        "top_p": 1.0,
+        "top_k": 10,
+    },
+)
+
+results = llm.generate(
+    inputs=[
+        {"input": "Write a short summary about the Gemini astrological sign"},
+    ],
+)
+# >>> print(results[0][0]["parsed_output"]["generations"])
+# Geminis are known for their curiosity, adaptability, and love of knowledge. They are
+# also known for their tendency to be indecisive, impulsive and prone to arguing. They
+# are ruled by the planet Mercury, which is associated with communication, quick thinking,
+# and change.
diff --git a/docs/snippets/technical-reference/llm/vertexaillm_generate.py b/docs/snippets/technical-reference/llm/vertexaillm_generate.py
new file mode 100644
index 0000000000..fd954cc95e
--- /dev/null
+++ b/docs/snippets/technical-reference/llm/vertexaillm_generate.py
@@ -0,0 +1,22 @@
+from distilabel.llm import VertexAILLM
+from distilabel.tasks import TextGenerationTask
+
+llm = VertexAILLM(
+    task=TextGenerationTask(), model="gemini-pro", max_new_tokens=512, temperature=0.3
+)
+
+results = llm.generate(
+    inputs=[
+        {"input": "Write a short summary about the Gemini astrological sign"},
+    ],
+)
+# >>> print(results[0][0]["parsed_output"]["generations"])
+# Gemini, the third astrological sign in the zodiac, is associated with the element of
+# air and is ruled by the planet Mercury. People born under the Gemini sign are often
+# characterized as being intelligent, curious, and communicative. They are known for their
+# quick wit, adaptability, and versatility. Geminis are often drawn to learning and enjoy
+# exploring new ideas and concepts. They are also known for their social nature and ability
+# to connect with others easily. However, Geminis can also be seen as indecisive, restless,
+# and superficial at times. They may struggle with commitment and may have difficulty focusing
+# on one thing for too long. Overall, Geminis are known for their intelligence, curiosity,
+# and social nature.
diff --git a/docs/snippets/technical-reference/llm/vllm_generate.py b/docs/snippets/technical-reference/llm/vllm_generate.py
index 934abac3b1..ef1b216df5 100644
--- a/docs/snippets/technical-reference/llm/vllm_generate.py
+++ b/docs/snippets/technical-reference/llm/vllm_generate.py
@@ -1,5 +1,5 @@
-from distilabel.tasks import TextGenerationTask
 from distilabel.llm import vLLM
+from distilabel.tasks import TextGenerationTask
 from vllm import LLM
 
 llm = vLLM(
diff --git a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_1.py b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_1.py
index d926e66d84..1ac5c3f3ec 100644
--- a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_1.py
+++ b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_1.py
@@ -4,6 +4,7 @@
 
 def load_notus(task: Task) -> LLM:  # (1)
     import os
+
     from distilabel.llm import vLLM
     from vllm import LLM
 
diff --git a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_2.py b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_2.py
index cd2fd2bc97..e91b29f710 100644
--- a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_2.py
+++ b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_2.py
@@ -4,6 +4,7 @@
 
 def load_notus(task: Task) -> LLM:
     import os
+
     from distilabel.llm import vLLM
     from vllm import LLM
 
@@ -20,6 +21,7 @@ def load_notus(task: Task) -> LLM:
 
 def load_zephyr(task: Task) -> LLM:
     import os
+
     from distilabel.llm import vLLM
     from vllm import LLM
 
@@ -36,6 +38,7 @@ def load_zephyr(task: Task) -> LLM:
 
 def load_starling(task: Task) -> LLM:
     import os
+
     from distilabel.llm import vLLM
     from vllm import LLM
 
@@ -52,6 +55,7 @@ def load_starling(task: Task) -> LLM:
 
 def load_neural_chat(task: Task) -> LLM:
     import os
+
     from distilabel.llm import vLLM
     from vllm import LLM
 
diff --git a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_4.py b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_4.py
index 8d852b2dcb..70baa9088c 100644
--- a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_4.py
+++ b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_4.py
@@ -1,6 +1,6 @@
-from distilabel.tasks import UltraFeedbackTask
-from distilabel.pipeline import Pipeline
 from distilabel.llm import LLM, ProcessLLM
+from distilabel.pipeline import Pipeline
+from distilabel.tasks import UltraFeedbackTask
 
 
 def load_gpt_4(task: UltraFeedbackTask) -> LLM:
diff --git a/docs/technical-reference/llms.md b/docs/technical-reference/llms.md
index 71803674dc..85d15df39e 100644
--- a/docs/technical-reference/llms.md
+++ b/docs/technical-reference/llms.md
@@ -171,6 +171,38 @@ See their release post with more details at [Announcing Together Inference Engin
 --8<-- "docs/snippets/technical-reference/llm/together_inference_generate.py"
 ```
 
+### Vertex AI LLMs
+
+Google Cloud Vertex AI platform allows to use Google proprietary models and deploy other models for online predictions. `distilabel` integrates with Vertex AI trough `VertexAILLM` and `VertexAIEndpointLLM` classes. 
+
+To use one of these classes you will need to have configured the Google Cloud authentication using one of these methods:
+
+- Settings `GOOGLE_CLOUD_CREDENTIALS` environment variable
+- Using `gcloud auth application-default login` command
+- Using `vertexai.init` Python SDK function from the `google-cloud-aiplatform` library before instantiating the `LLM`.
+
+
+#### Proprietary models (Gemini and PaLM)
+
+`VertexAILLM` allows to use Google proprietary models such as Gemini and PaLM. These models are served trough Vertex AI and its different APIs: 
+
+- **Gemini API**: which offers models from the Gemini family such as `gemini-pro` and `gemini-pro-vision` models. More information: [Vertex AI - Gemini API](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini).
+- **Text Generation API**: which offers models from the PaLM family such as `text-bison`. More information: [Vertex AI - PaLM 2 for text](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions).
+- **Code Generation API**: which offers models from the PaLM family for code-generation such as `code-bison`. More information: [Vertex AI - Codey for code generation](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation).
+
+
+```python
+--8<-- "docs/snippets/technical-reference/llm/vertexaillm_generate.py"
+```
+
+#### Endpoints for online prediction
+
+`VertexAIEndpointLLM` class allows to use a model deployed in a Vertex AI Endpoint for online prediction to generate text. Unlike the rest of `LLM`s classes which comes with a set of pre-defined arguments in its `__init__` method, `VertexAIEndpointLLM` requires to provide the generation arguments to be used in a dictionary that will pased to the `generation_kwargs` argument. This is because the generation parameters will be different and have different names depending on the Docker image deployed on the Vertex AI Endpoint.
+
+```python
+--8<-- "docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py"
+```
+
 ## `ProcessLLM` and `LLMPool`
 
 By default, `distilabel` uses a single process, so the generation loop is usually bottlenecked by the model inference time and Python GIL. To overcome this limitation, we provide the `ProcessLLM` class that allows to load an `LLM` in a different process, avoiding the GIL and allowing to parallelize the generation loop. Creating a `ProcessLLM` is easy as: