Add Vertex AI LLMs documentation (#222)

argilla-io · Jan 8, 2024 · 9652cc7 · 9652cc7
1 parent 3c5fb70
commit 9652cc7
Show file tree

Hide file tree

Showing 10 changed files with 103 additions and 16 deletions.
diff --git a/docs/snippets/technical-reference/llm/llmpool.py b/docs/snippets/technical-reference/llm/llmpool.py
@@ -1,5 +1,6 @@
-from distilabel.tasks import TextGenerationTask, Task
-from distilabel.llm import ProcessLLM, LLM, LLMPool
+from distilabel.llm import LLM, LLMPool, ProcessLLM
+from distilabel.tasks import Task, TextGenerationTask
+
 
 def load_gpt_3(task: Task) -> LLM:
     from distilabel.llm import OpenAILLM
@@ -10,6 +11,7 @@ def load_gpt_3(task: Task) -> LLM:
         num_threads=4,
     )
 
+
 def load_gpt_4(task: Task) -> LLM:
     from distilabel.llm import OpenAILLM
 
@@ -20,20 +22,20 @@ def load_gpt_4(task: Task) -> LLM:
     )
 
 
-pool = LLMPool(llms=[
-    ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_3),
-    ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_4),
-])
-result = pool.generate(
-    inputs=[{"input": "Write a letter for Bob"}], num_generations=2
+pool = LLMPool(
+    llms=[
+        ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_3),
+        ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_4),
+    ]
 )
+result = pool.generate(inputs=[{"input": "Write a letter for Bob"}], num_generations=2)
 pool.teardown()
 # >>> print(result[0][0]["parsed_output"]["generations"], end="\n\n\n\n\n\n---->")
 # Dear Bob,
 # I hope this letter finds you in good health and high spirits. I know it's been a while since we last caught up, and I wanted to take the time to connect and share a few updates.
 # Life has been keeping me pretty busy lately. [Provide a brief overview of what you've been up to: work, school, family, hobbies, etc.]
 # I've often found myself reminiscing about the good old days, like when we [include a memorable moment or shared experience with Bob].
 # >>> print(result[0][1]["parsed_output"]["generations"])
-# Of course, I'd be happy to draft a sample letter for you. However, I would need some additional 
-# information including who "Bob" is, the subject matter of the letter, the tone (formal or informal), 
+# Of course, I'd be happy to draft a sample letter for you. However, I would need some additional
+# information including who "Bob" is, the subject matter of the letter, the tone (formal or informal),
 # and any specific details or points you'd like to include. Please provide some more context and I'll do my best to assist you.
diff --git a/docs/snippets/technical-reference/llm/processllm.py b/docs/snippets/technical-reference/llm/processllm.py
@@ -1,5 +1,5 @@
-from distilabel.tasks import TextGenerationTask, Task
-from distilabel.llm import ProcessLLM, LLM
+from distilabel.llm import LLM, ProcessLLM
+from distilabel.tasks import Task, TextGenerationTask
 
 
 def load_gpt_4(task: Task) -> LLM:

diff --git a/docs/snippets/technical-reference/llm/together_inference_generate.py b/docs/snippets/technical-reference/llm/together_inference_generate.py
@@ -1,5 +1,5 @@
-from distilabel.tasks import TextGenerationTask
 from distilabel.llm import TogetherInferenceLLM
+from distilabel.tasks import TextGenerationTask
 
 llm = TogetherInferenceLLM(
     model="togethercomputer/llama-2-70b-chat",

diff --git a/docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py b/docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py
@@ -0,0 +1,26 @@
+from distilabel.llm import VertexAIEndpointLLM
+from distilabel.tasks import TextGenerationTask
+
+llm = VertexAIEndpointLLM(
+    task=TextGenerationTask(),
+    endpoint_id="3466410517680095232",
+    project="experiments-404412",
+    location="us-central1",
+    generation_kwargs={
+        "temperature": 1.0,
+        "max_tokens": 128,
+        "top_p": 1.0,
+        "top_k": 10,
+    },
+)
+
+results = llm.generate(
+    inputs=[
+        {"input": "Write a short summary about the Gemini astrological sign"},
+    ],
+)
+# >>> print(results[0][0]["parsed_output"]["generations"])
+# Geminis are known for their curiosity, adaptability, and love of knowledge. They are
+# also known for their tendency to be indecisive, impulsive and prone to arguing. They
+# are ruled by the planet Mercury, which is associated with communication, quick thinking,
+# and change.
diff --git a/docs/snippets/technical-reference/llm/vertexaillm_generate.py b/docs/snippets/technical-reference/llm/vertexaillm_generate.py
@@ -0,0 +1,22 @@
+from distilabel.llm import VertexAILLM
+from distilabel.tasks import TextGenerationTask
+
+llm = VertexAILLM(
+    task=TextGenerationTask(), model="gemini-pro", max_new_tokens=512, temperature=0.3
+)
+
+results = llm.generate(
+    inputs=[
+        {"input": "Write a short summary about the Gemini astrological sign"},
+    ],
+)
+# >>> print(results[0][0]["parsed_output"]["generations"])
+# Gemini, the third astrological sign in the zodiac, is associated with the element of
+# air and is ruled by the planet Mercury. People born under the Gemini sign are often
+# characterized as being intelligent, curious, and communicative. They are known for their
+# quick wit, adaptability, and versatility. Geminis are often drawn to learning and enjoy
+# exploring new ideas and concepts. They are also known for their social nature and ability
+# to connect with others easily. However, Geminis can also be seen as indecisive, restless,
+# and superficial at times. They may struggle with commitment and may have difficulty focusing
+# on one thing for too long. Overall, Geminis are known for their intelligence, curiosity,
+# and social nature.
diff --git a/docs/snippets/technical-reference/llm/vllm_generate.py b/docs/snippets/technical-reference/llm/vllm_generate.py
@@ -1,5 +1,5 @@
-from distilabel.tasks import TextGenerationTask
 from distilabel.llm import vLLM
+from distilabel.tasks import TextGenerationTask
 from vllm import LLM
 
 llm = vLLM(

diff --git a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_1.py b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_1.py
@@ -4,6 +4,7 @@
 
 def load_notus(task: Task) -> LLM:  # (1)
     import os
+
     from distilabel.llm import vLLM
     from vllm import LLM
 

diff --git a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_2.py b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_2.py
@@ -4,6 +4,7 @@
 
 def load_notus(task: Task) -> LLM:
     import os
+
     from distilabel.llm import vLLM
     from vllm import LLM
 
@@ -20,6 +21,7 @@ def load_notus(task: Task) -> LLM:
 
 def load_zephyr(task: Task) -> LLM:
     import os
+
     from distilabel.llm import vLLM
     from vllm import LLM
 
@@ -36,6 +38,7 @@ def load_zephyr(task: Task) -> LLM:
 
 def load_starling(task: Task) -> LLM:
     import os
+
     from distilabel.llm import vLLM
     from vllm import LLM
 
@@ -52,6 +55,7 @@ def load_starling(task: Task) -> LLM:
 
 def load_neural_chat(task: Task) -> LLM:
     import os
+
     from distilabel.llm import vLLM
     from vllm import LLM
 

diff --git a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_4.py b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_4.py
@@ -1,6 +1,6 @@
-from distilabel.tasks import UltraFeedbackTask
-from distilabel.pipeline import Pipeline
 from distilabel.llm import LLM, ProcessLLM
+from distilabel.pipeline import Pipeline
+from distilabel.tasks import UltraFeedbackTask
 
 
 def load_gpt_4(task: UltraFeedbackTask) -> LLM:

diff --git a/docs/technical-reference/llms.md b/docs/technical-reference/llms.md
@@ -171,6 +171,38 @@ See their release post with more details at [Announcing Together Inference Engin
 --8<-- "docs/snippets/technical-reference/llm/together_inference_generate.py"
 ```
 
+### Vertex AI LLMs
+
+Google Cloud Vertex AI platform allows to use Google proprietary models and deploy other models for online predictions. `distilabel` integrates with Vertex AI trough `VertexAILLM` and `VertexAIEndpointLLM` classes. 
+
+To use one of these classes you will need to have configured the Google Cloud authentication using one of these methods:
+
+- Settings `GOOGLE_CLOUD_CREDENTIALS` environment variable
+- Using `gcloud auth application-default login` command
+- Using `vertexai.init` Python SDK function from the `google-cloud-aiplatform` library before instantiating the `LLM`.
+
+
+#### Proprietary models (Gemini and PaLM)
+
+`VertexAILLM` allows to use Google proprietary models such as Gemini and PaLM. These models are served trough Vertex AI and its different APIs: 
+
+- **Gemini API**: which offers models from the Gemini family such as `gemini-pro` and `gemini-pro-vision` models. More information: [Vertex AI - Gemini API](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini).
+- **Text Generation API**: which offers models from the PaLM family such as `text-bison`. More information: [Vertex AI - PaLM 2 for text](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions).
+- **Code Generation API**: which offers models from the PaLM family for code-generation such as `code-bison`. More information: [Vertex AI - Codey for code generation](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation).
+
+
+```python
+--8<-- "docs/snippets/technical-reference/llm/vertexaillm_generate.py"
+```
+
+#### Endpoints for online prediction
+
+`VertexAIEndpointLLM` class allows to use a model deployed in a Vertex AI Endpoint for online prediction to generate text. Unlike the rest of `LLM`s classes which comes with a set of pre-defined arguments in its `__init__` method, `VertexAIEndpointLLM` requires to provide the generation arguments to be used in a dictionary that will pased to the `generation_kwargs` argument. This is because the generation parameters will be different and have different names depending on the Docker image deployed on the Vertex AI Endpoint.
+
+```python
+--8<-- "docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py"
+```
+
 ## `ProcessLLM` and `LLMPool`
 
 By default, `distilabel` uses a single process, so the generation loop is usually bottlenecked by the model inference time and Python GIL. To overcome this limitation, we provide the `ProcessLLM` class that allows to load an `LLM` in a different process, avoiding the GIL and allowing to parallelize the generation loop. Creating a `ProcessLLM` is easy as: