From 9652cc7e16c9d1039ffafd2e1e46a8b1ca4c41e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Mart=C3=ADn=20Bl=C3=A1zquez?= Date: Mon, 8 Jan 2024 12:32:19 +0100 Subject: [PATCH] Add Vertex AI `LLM`s documentation (#222) --- .../technical-reference/llm/llmpool.py | 22 +++++++------ .../technical-reference/llm/processllm.py | 4 +-- .../llm/together_inference_generate.py | 2 +- .../llm/vertexaiendpointllm_generate.py | 26 +++++++++++++++ .../llm/vertexaillm_generate.py | 22 +++++++++++++ .../technical-reference/llm/vllm_generate.py | 2 +- .../pipeline/pipeline_llmpool_processllm_1.py | 1 + .../pipeline/pipeline_llmpool_processllm_2.py | 4 +++ .../pipeline/pipeline_llmpool_processllm_4.py | 4 +-- docs/technical-reference/llms.md | 32 +++++++++++++++++++ 10 files changed, 103 insertions(+), 16 deletions(-) create mode 100644 docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py create mode 100644 docs/snippets/technical-reference/llm/vertexaillm_generate.py diff --git a/docs/snippets/technical-reference/llm/llmpool.py b/docs/snippets/technical-reference/llm/llmpool.py index d7afc5fb43..4ae552072a 100644 --- a/docs/snippets/technical-reference/llm/llmpool.py +++ b/docs/snippets/technical-reference/llm/llmpool.py @@ -1,5 +1,6 @@ -from distilabel.tasks import TextGenerationTask, Task -from distilabel.llm import ProcessLLM, LLM, LLMPool +from distilabel.llm import LLM, LLMPool, ProcessLLM +from distilabel.tasks import Task, TextGenerationTask + def load_gpt_3(task: Task) -> LLM: from distilabel.llm import OpenAILLM @@ -10,6 +11,7 @@ def load_gpt_3(task: Task) -> LLM: num_threads=4, ) + def load_gpt_4(task: Task) -> LLM: from distilabel.llm import OpenAILLM @@ -20,13 +22,13 @@ def load_gpt_4(task: Task) -> LLM: ) -pool = LLMPool(llms=[ - ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_3), - ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_4), -]) -result = pool.generate( - inputs=[{"input": "Write a letter for Bob"}], num_generations=2 +pool = LLMPool( + llms=[ + ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_3), + ProcessLLM(task=TextGenerationTask(), load_llm_fn=load_gpt_4), + ] ) +result = pool.generate(inputs=[{"input": "Write a letter for Bob"}], num_generations=2) pool.teardown() # >>> print(result[0][0]["parsed_output"]["generations"], end="\n\n\n\n\n\n---->") # Dear Bob, @@ -34,6 +36,6 @@ def load_gpt_4(task: Task) -> LLM: # Life has been keeping me pretty busy lately. [Provide a brief overview of what you've been up to: work, school, family, hobbies, etc.] # I've often found myself reminiscing about the good old days, like when we [include a memorable moment or shared experience with Bob]. # >>> print(result[0][1]["parsed_output"]["generations"]) -# Of course, I'd be happy to draft a sample letter for you. However, I would need some additional -# information including who "Bob" is, the subject matter of the letter, the tone (formal or informal), +# Of course, I'd be happy to draft a sample letter for you. However, I would need some additional +# information including who "Bob" is, the subject matter of the letter, the tone (formal or informal), # and any specific details or points you'd like to include. Please provide some more context and I'll do my best to assist you. diff --git a/docs/snippets/technical-reference/llm/processllm.py b/docs/snippets/technical-reference/llm/processllm.py index 6ee492f3fd..fee31d6d1d 100644 --- a/docs/snippets/technical-reference/llm/processllm.py +++ b/docs/snippets/technical-reference/llm/processllm.py @@ -1,5 +1,5 @@ -from distilabel.tasks import TextGenerationTask, Task -from distilabel.llm import ProcessLLM, LLM +from distilabel.llm import LLM, ProcessLLM +from distilabel.tasks import Task, TextGenerationTask def load_gpt_4(task: Task) -> LLM: diff --git a/docs/snippets/technical-reference/llm/together_inference_generate.py b/docs/snippets/technical-reference/llm/together_inference_generate.py index 9dab6c1c47..f0ad31bead 100644 --- a/docs/snippets/technical-reference/llm/together_inference_generate.py +++ b/docs/snippets/technical-reference/llm/together_inference_generate.py @@ -1,5 +1,5 @@ -from distilabel.tasks import TextGenerationTask from distilabel.llm import TogetherInferenceLLM +from distilabel.tasks import TextGenerationTask llm = TogetherInferenceLLM( model="togethercomputer/llama-2-70b-chat", diff --git a/docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py b/docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py new file mode 100644 index 0000000000..f6657b7d78 --- /dev/null +++ b/docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py @@ -0,0 +1,26 @@ +from distilabel.llm import VertexAIEndpointLLM +from distilabel.tasks import TextGenerationTask + +llm = VertexAIEndpointLLM( + task=TextGenerationTask(), + endpoint_id="3466410517680095232", + project="experiments-404412", + location="us-central1", + generation_kwargs={ + "temperature": 1.0, + "max_tokens": 128, + "top_p": 1.0, + "top_k": 10, + }, +) + +results = llm.generate( + inputs=[ + {"input": "Write a short summary about the Gemini astrological sign"}, + ], +) +# >>> print(results[0][0]["parsed_output"]["generations"]) +# Geminis are known for their curiosity, adaptability, and love of knowledge. They are +# also known for their tendency to be indecisive, impulsive and prone to arguing. They +# are ruled by the planet Mercury, which is associated with communication, quick thinking, +# and change. diff --git a/docs/snippets/technical-reference/llm/vertexaillm_generate.py b/docs/snippets/technical-reference/llm/vertexaillm_generate.py new file mode 100644 index 0000000000..fd954cc95e --- /dev/null +++ b/docs/snippets/technical-reference/llm/vertexaillm_generate.py @@ -0,0 +1,22 @@ +from distilabel.llm import VertexAILLM +from distilabel.tasks import TextGenerationTask + +llm = VertexAILLM( + task=TextGenerationTask(), model="gemini-pro", max_new_tokens=512, temperature=0.3 +) + +results = llm.generate( + inputs=[ + {"input": "Write a short summary about the Gemini astrological sign"}, + ], +) +# >>> print(results[0][0]["parsed_output"]["generations"]) +# Gemini, the third astrological sign in the zodiac, is associated with the element of +# air and is ruled by the planet Mercury. People born under the Gemini sign are often +# characterized as being intelligent, curious, and communicative. They are known for their +# quick wit, adaptability, and versatility. Geminis are often drawn to learning and enjoy +# exploring new ideas and concepts. They are also known for their social nature and ability +# to connect with others easily. However, Geminis can also be seen as indecisive, restless, +# and superficial at times. They may struggle with commitment and may have difficulty focusing +# on one thing for too long. Overall, Geminis are known for their intelligence, curiosity, +# and social nature. diff --git a/docs/snippets/technical-reference/llm/vllm_generate.py b/docs/snippets/technical-reference/llm/vllm_generate.py index 934abac3b1..ef1b216df5 100644 --- a/docs/snippets/technical-reference/llm/vllm_generate.py +++ b/docs/snippets/technical-reference/llm/vllm_generate.py @@ -1,5 +1,5 @@ -from distilabel.tasks import TextGenerationTask from distilabel.llm import vLLM +from distilabel.tasks import TextGenerationTask from vllm import LLM llm = vLLM( diff --git a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_1.py b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_1.py index d926e66d84..1ac5c3f3ec 100644 --- a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_1.py +++ b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_1.py @@ -4,6 +4,7 @@ def load_notus(task: Task) -> LLM: # (1) import os + from distilabel.llm import vLLM from vllm import LLM diff --git a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_2.py b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_2.py index cd2fd2bc97..e91b29f710 100644 --- a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_2.py +++ b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_2.py @@ -4,6 +4,7 @@ def load_notus(task: Task) -> LLM: import os + from distilabel.llm import vLLM from vllm import LLM @@ -20,6 +21,7 @@ def load_notus(task: Task) -> LLM: def load_zephyr(task: Task) -> LLM: import os + from distilabel.llm import vLLM from vllm import LLM @@ -36,6 +38,7 @@ def load_zephyr(task: Task) -> LLM: def load_starling(task: Task) -> LLM: import os + from distilabel.llm import vLLM from vllm import LLM @@ -52,6 +55,7 @@ def load_starling(task: Task) -> LLM: def load_neural_chat(task: Task) -> LLM: import os + from distilabel.llm import vLLM from vllm import LLM diff --git a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_4.py b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_4.py index 8d852b2dcb..70baa9088c 100644 --- a/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_4.py +++ b/docs/snippets/technical-reference/pipeline/pipeline_llmpool_processllm_4.py @@ -1,6 +1,6 @@ -from distilabel.tasks import UltraFeedbackTask -from distilabel.pipeline import Pipeline from distilabel.llm import LLM, ProcessLLM +from distilabel.pipeline import Pipeline +from distilabel.tasks import UltraFeedbackTask def load_gpt_4(task: UltraFeedbackTask) -> LLM: diff --git a/docs/technical-reference/llms.md b/docs/technical-reference/llms.md index 71803674dc..85d15df39e 100644 --- a/docs/technical-reference/llms.md +++ b/docs/technical-reference/llms.md @@ -171,6 +171,38 @@ See their release post with more details at [Announcing Together Inference Engin --8<-- "docs/snippets/technical-reference/llm/together_inference_generate.py" ``` +### Vertex AI LLMs + +Google Cloud Vertex AI platform allows to use Google proprietary models and deploy other models for online predictions. `distilabel` integrates with Vertex AI trough `VertexAILLM` and `VertexAIEndpointLLM` classes. + +To use one of these classes you will need to have configured the Google Cloud authentication using one of these methods: + +- Settings `GOOGLE_CLOUD_CREDENTIALS` environment variable +- Using `gcloud auth application-default login` command +- Using `vertexai.init` Python SDK function from the `google-cloud-aiplatform` library before instantiating the `LLM`. + + +#### Proprietary models (Gemini and PaLM) + +`VertexAILLM` allows to use Google proprietary models such as Gemini and PaLM. These models are served trough Vertex AI and its different APIs: + +- **Gemini API**: which offers models from the Gemini family such as `gemini-pro` and `gemini-pro-vision` models. More information: [Vertex AI - Gemini API](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini). +- **Text Generation API**: which offers models from the PaLM family such as `text-bison`. More information: [Vertex AI - PaLM 2 for text](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions). +- **Code Generation API**: which offers models from the PaLM family for code-generation such as `code-bison`. More information: [Vertex AI - Codey for code generation](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation). + + +```python +--8<-- "docs/snippets/technical-reference/llm/vertexaillm_generate.py" +``` + +#### Endpoints for online prediction + +`VertexAIEndpointLLM` class allows to use a model deployed in a Vertex AI Endpoint for online prediction to generate text. Unlike the rest of `LLM`s classes which comes with a set of pre-defined arguments in its `__init__` method, `VertexAIEndpointLLM` requires to provide the generation arguments to be used in a dictionary that will pased to the `generation_kwargs` argument. This is because the generation parameters will be different and have different names depending on the Docker image deployed on the Vertex AI Endpoint. + +```python +--8<-- "docs/snippets/technical-reference/llm/vertexaiendpointllm_generate.py" +``` + ## `ProcessLLM` and `LLMPool` By default, `distilabel` uses a single process, so the generation loop is usually bottlenecked by the model inference time and Python GIL. To overcome this limitation, we provide the `ProcessLLM` class that allows to load an `LLM` in a different process, avoiding the GIL and allowing to parallelize the generation loop. Creating a `ProcessLLM` is easy as: