diff --git a/docs/assets/logo.svg b/docs/assets/logo.svg new file mode 100644 index 0000000000..e2cba9d3bc --- /dev/null +++ b/docs/assets/logo.svg @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/guides.md b/docs/guides.md deleted file mode 100644 index a79545bf93..0000000000 --- a/docs/guides.md +++ /dev/null @@ -1,3 +0,0 @@ -# Guides - -This page is still WIP, stay tuned! diff --git a/docs/index.md b/docs/index.md index 6be6af1640..39e4ac5588 100644 --- a/docs/index.md +++ b/docs/index.md @@ -39,3 +39,33 @@ will create a `labeller` LLM using `OpenAILLM` with the `UltraFeedback` task for For a more complete example, check out our awesome notebook on Google Colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1rO1-OlLFPBC0KPuXQOeMpZOeajiwNoMy?usp=sharing) + +## Navigation + +
+ +-

[**Tutorials**](./learn/tutorials/)

+ + --- + + End to end project lessons. + +-

[**User Guides**](./learn/user-guides/)

+ + --- + + Practical guides to achieve specific tasks with `distilabel`. + +-

[**Concept Guides**](./technical-reference/llms.md)

+ + --- + + Understand the components and their interactions. + +-

[**API Reference**](./reference/distilabel)

+ + --- + + Technical description of the classes and functions. + +
\ No newline at end of file diff --git a/docs/learn/index.md b/docs/learn/index.md new file mode 100644 index 0000000000..6ecb423ba4 --- /dev/null +++ b/docs/learn/index.md @@ -0,0 +1,3 @@ +# Learn + +This section is the guide for using `distilabel`. It contains tutorials and guides that delve into the technical aspects of utilizing `distilabel`. diff --git a/docs/learn/tutorials/index.md b/docs/learn/tutorials/index.md new file mode 100644 index 0000000000..4b46d3e620 --- /dev/null +++ b/docs/learn/tutorials/index.md @@ -0,0 +1,6 @@ +# Tutorials + +!!! warning "🚧 Work in Progress" + This page is a work in progress. + +This section will guide you step by step to create different datasets types with `distilabel`. diff --git a/docs/learn/user-guides/index.md b/docs/learn/user-guides/index.md new file mode 100644 index 0000000000..62dd65bf19 --- /dev/null +++ b/docs/learn/user-guides/index.md @@ -0,0 +1,6 @@ +# User guides + +!!! warning "🚧 Work in Progress" + This page is a work in progress. + +This section explains the main components of `distilabel`. diff --git a/docs/snippets/technical-reference/llm/inference_endpoint_generate.py b/docs/snippets/technical-reference/llm/inference_endpoint_generate.py new file mode 100644 index 0000000000..56acc68a14 --- /dev/null +++ b/docs/snippets/technical-reference/llm/inference_endpoint_generate.py @@ -0,0 +1,24 @@ +import os + +from distilabel.llm import InferenceEndpointsLLM +from distilabel.tasks import TextGenerationTask + +endpoint_name = "aws-notus-7b-v1-4052" or os.getenv("HF_INFERENCE_ENDPOINT_NAME") +endpoint_namespace = "argilla" or os.getenv("HF_NAMESPACE") +token = os.getenv("HF_TOKEN") # hf_... + +llm = InferenceEndpointsLLM( + endpoint_name=endpoint_name, + endpoint_namespace=endpoint_namespace, + token=token, + task=TextGenerationTask(), + max_new_tokens=512, + prompt_format="notus", +) +result = llm.generate([{"input": "What are critique LLMs?"}]) +# print(result[0][0]["parsed_output"]["generations"]) +# Critique LLMs (Long Land Moore Machines) are artificial intelligence models designed specifically for analyzing and evaluating the quality or worth of a particular subject or object. These models can be trained on a large dataset of reviews, ratings, or commentary related to a product, service, artwork, or any other topic of interest. +# The training data can include both positive and negative feedback, helping the LLM to understand the nuanced aspects of quality and value. The model uses natural language processing (NLP) techniques to extract meaningful insights, including sentiment analysis, entity recognition, and text classification. +# Once the model is trained, it can be used to analyze new input data and provide a critical assessment based on its learned understanding of quality and value. For example, a critique LLM for movies could evaluate a new film and generate a detailed review highlighting its strengths, weaknesses, and overall rating. +# Critique LLMs are becoming increasingly useful in various industries, such as e-commerce, education, and entertainment, where they can provide objective and reliable feedback to help guide decision-making processes. They can also aid in content optimization by highlighting areas of improvement or recommending strategies for enhancing user engagement. +# In summary, critique LLMs are powerful tools for analyzing and evaluating the quality or worth of different subjects or objects, helping individuals and organizations make informed decisions with confidence. diff --git a/docs/snippets/technical-reference/llm/llamacpp_generate.py b/docs/snippets/technical-reference/llm/llamacpp_generate.py new file mode 100644 index 0000000000..78dbfef7fa --- /dev/null +++ b/docs/snippets/technical-reference/llm/llamacpp_generate.py @@ -0,0 +1,19 @@ +from distilabel.llm import LlamaCppLLM +from distilabel.tasks import TextGenerationTask +from llama_cpp import Llama + +# Instantiate our LLM with them: +llm = LlamaCppLLM( + model=Llama(model_path="./notus-7b-v1.q4_k_m.gguf", n_gpu_layers=-1), + task=TextGenerationTask(), + max_new_tokens=128, + temperature=0.3, + prompt_format="notus", +) + +result_llamacpp = llm.generate([{"input": "What is the capital of Spain?"}]) +# >>> print(result_llamacpp[0][0]["parsed_output"]["generations"]) +# The capital of Spain is Madrid. It is located in the center of the country and +# is known for its vibrant culture, beautiful architecture, and delicious food. +# Madrid is home to many famous landmarks such as the Prado Museum, Retiro Park, +# and the Royal Palace of Madrid. I hope this information helps! diff --git a/docs/snippets/technical-reference/llm/openai_generate.py b/docs/snippets/technical-reference/llm/openai_generate.py new file mode 100644 index 0000000000..16d827712a --- /dev/null +++ b/docs/snippets/technical-reference/llm/openai_generate.py @@ -0,0 +1,18 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.tasks import OpenAITextGenerationTask + +openaillm = OpenAILLM( + model="gpt-3.5-turbo", + task=OpenAITextGenerationTask(), + max_new_tokens=256, + num_threads=2, + openai_api_key=os.environ.get("OPENAI_API_KEY"), + temperature=0.3, +) +result_openai = openaillm.generate([{"input": "What is OpenAI?"}]) +# >>> result_openai +# [] +# >>> result_openai[0].result()[0][0]["parsed_output"]["generations"] +# 'OpenAI is an artificial intelligence research organization that aims to ensure that artificial general intelligence (AGI) benefits all of humanity. AGI refers to highly autonomous systems that outperform humans at most economically valuable work. OpenAI conducts research, develops AI technologies, and promotes the responsible and safe use of AI. They also work on projects to make AI more accessible and beneficial to society. OpenAI is committed to transparency, cooperation, and avoiding uses of AI that could harm humanity or concentrate power in the wrong hands.' diff --git a/docs/snippets/technical-reference/llm/transformers_generate.py b/docs/snippets/technical-reference/llm/transformers_generate.py new file mode 100644 index 0000000000..ded93c6672 --- /dev/null +++ b/docs/snippets/technical-reference/llm/transformers_generate.py @@ -0,0 +1,17 @@ +from distilabel.llm import TransformersLLM +from distilabel.tasks import TextGenerationTask +from transformers import AutoModelForCausalLM, AutoTokenizer + +# Load the models from huggingface hub: +tokenizer = AutoTokenizer.from_pretrained("argilla/notus-7b-v1") +model = AutoModelForCausalLM.from_pretrained("argilla/notus-7b-v1") + +# Instantiate our LLM with them: +llm = TransformersLLM( + model=model, + tokenizer=tokenizer, + task=TextGenerationTask(), + max_new_tokens=128, + temperature=0.3, + prompt_format="notus", +) diff --git a/docs/snippets/technical-reference/pipeline/argilla.py b/docs/snippets/technical-reference/pipeline/argilla.py new file mode 100644 index 0000000000..a87097e68f --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/argilla.py @@ -0,0 +1,20 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argilla as rg + +rg.init(api_key="", api_url="") + +rg_dataset = pipe_dataset.to_argilla() +rg_dataset.push_to_argilla(name="preference-dataset", workspace="admin") diff --git a/docs/snippets/technical-reference/pipeline/pipe_1.py b/docs/snippets/technical-reference/pipeline/pipe_1.py new file mode 100644 index 0000000000..091d70641f --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipe_1.py @@ -0,0 +1,37 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from distilabel.llm import InferenceEndpointsLLM +from distilabel.pipeline import pipeline +from distilabel.tasks import TextGenerationTask + +pipe = pipeline( + "preference", + "text-quality", + generator=InferenceEndpointsLLM( + endpoint_name=endpoint_name, + endpoint_namespace=endpoint_namespace, + token=token, + task=TextGenerationTask(), + max_new_tokens=512, + do_sample=True, + prompt_format="notus", + ), + max_new_tokens=256, + num_threads=2, + openai_api_key=os.getenv("OPENAI_API_KEY"), + temperature=0.0, +) diff --git a/docs/snippets/technical-reference/pipeline/pipe_2.py b/docs/snippets/technical-reference/pipeline/pipe_2.py new file mode 100644 index 0000000000..fb4efcaf03 --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipe_2.py @@ -0,0 +1,29 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datasets import load_dataset + +instruction_dataset = ( + load_dataset("HuggingFaceH4/instruction-dataset", split="test[:3]") + .remove_columns(["completion", "meta"]) + .rename_column("prompt", "input") +) + +pipe_dataset = pipe.generate( + instruction_dataset, + num_generations=2, + batch_size=1, + enable_checkpoints=True, + display_progress_bar=True, +) diff --git a/docs/snippets/technical-reference/pipeline/pipe_3.py b/docs/snippets/technical-reference/pipeline/pipe_3.py new file mode 100644 index 0000000000..a5be009d71 --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipe_3.py @@ -0,0 +1,41 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +print(pipe_dataset["input"][-1]) +# Create a 3 turn conversation between a customer and a grocery store clerk - that is, 3 per person. Then tell me what they talked about. + +print(pipe_dataset["generations"][-1][-1]) +# Customer: Hi there, I'm looking for some fresh berries. Do you have any raspberries or blueberries in stock? + +# Grocery Store Clerk: Yes, we have both raspberries and blueberries in stock today. Would you like me to grab some for you or can you find them yourself? + +# Customer: I'd like your help getting some berries. Can you also suggest which variety is sweeter? Raspberries or blueberries? + +# Grocery Store Clerk: Raspberries and blueberries both have distinct flavors. Raspberries are more tart and a little sweeter whereas blueberries tend to be a little sweeter and have a milder taste. It ultimately depends on your personal preference. Let me grab some of each for you to try at home and see which one you like better. + +# Customer: That sounds like a great plan. How often do you receive deliveries? Do you have some new varieties of berries arriving soon? + +# Grocery Store Clerk: We receive deliveries twice a week, on Wednesdays and Sundays. We also have a rotation of different varieties of berries throughout the season, so keep an eye out for new arrivals. Thanks for shopping with us, can I help you with anything else today? + +# Customer: No, that's all for now. I'm always happy to support your local store. + +# turn 1: berries, fresh produce availability, customer preference +# turn 2: product recommendations based on taste and personal preference, availability +# turn 3: store acknowledgment, shopping gratitude, loyalty and repeat business expectation. + +print(pipe_dataset["rating"][-1][-1]) +# 5.0 + +print(pipe_dataset["rationale"][-1][-1]) +# The text accurately follows the given instructions and provides a conversation between a customer and a grocery store clerk. The information provided is correct, informative, and aligned with the user's intent. There are no hallucinations or misleading details. diff --git a/docs/snippets/technical-reference/pipeline/pipeline_generator_1.py b/docs/snippets/technical-reference/pipeline/pipeline_generator_1.py new file mode 100644 index 0000000000..82d1fba09f --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipeline_generator_1.py @@ -0,0 +1,20 @@ +import os + +from distilabel.llm import InferenceEndpointsLLM +from distilabel.pipeline import Pipeline +from distilabel.tasks import TextGenerationTask + +endpoint_name = "aws-notus-7b-v1-4052" or os.getenv("HF_INFERENCE_ENDPOINT_NAME") +endpoint_namespace = "argilla" or os.getenv("HF_NAMESPACE") + +pipe_generation = Pipeline( + generator=InferenceEndpointsLLM( + endpoint_name=endpoint_name, # The name given of the deployed model + endpoint_namespace=endpoint_namespace, # This usually corresponds to the organization, in this case "argilla" + token=os.getenv("HF_TOKEN"), # hf_... + task=TextGenerationTask(), + max_new_tokens=512, + do_sample=True, + prompt_format="notus", + ), +) diff --git a/docs/snippets/technical-reference/pipeline/pipeline_generator_2.py b/docs/snippets/technical-reference/pipeline/pipeline_generator_2.py new file mode 100644 index 0000000000..5eeeaa8918 --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipeline_generator_2.py @@ -0,0 +1,20 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datasets import Dataset + +dataset = Dataset.from_dict( + {"input": ["Create an easy dinner recipe with few ingredients"]} +) +dataset_generated = pipe_generation.generate(dataset, num_generations=2) diff --git a/docs/snippets/technical-reference/pipeline/pipeline_generator_3.py b/docs/snippets/technical-reference/pipeline/pipeline_generator_3.py new file mode 100644 index 0000000000..a1168f97d8 --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipeline_generator_3.py @@ -0,0 +1,53 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +print(dataset_generated) +# Dataset({ +# features: ['input', 'generation_model', 'generation_prompt', 'raw_generation_responses', 'generations'], +# num_rows: 1 +# }) + +print(dataset_generated[0]["generations"][0]) +# Here's a simple and delicious dinner recipe with only a few ingredients: + +# Garlic Butter Chicken with Roasted Vegetables + +# Ingredients: +# - 4 boneless, skinless chicken breasts +# - 4 tablespoons butter +# - 4 cloves garlic, minced +# - 1 teaspoon dried oregano +# - 1/2 teaspoon salt +# - 1/4 teaspoon black pepper +# - 1 zucchini, sliced +# - 1 red bell pepper, sliced +# - 1 cup cherry tomatoes + +# Instructions: + +# 1. Preheat oven to 400°F (200°C). + +# 2. Melt butter in a small saucepan over low heat. Add minced garlic and heat until fragrant, about 1-2 minutes. + +# 3. Place chicken breasts in a baking dish and brush garlic butter over each one. + +# 4. Sprinkle oregano, salt, and black pepper over the chicken. + +# 5. In a separate baking dish, add sliced zucchini, red bell pepper, and cherry tomatoes. Brush with remaining garlic butter. + +# 6. Roast the chicken and vegetables in the preheated oven for 25-30 minutes or until cooked through and the vegetables are tender and lightly browned. + +# 7. Transfer the chicken to plates and serve with the roasted vegetables alongside. Enjoy! + +# This recipe requires simple ingredients and is easy to prepare, making it perfect for a quick, satisfying dinner. The garlic butter adds maximum flavor, while the roasted vegetables complement the chicken beautifully, providing additional nutrition and texture. With minimal effort, you can have a delicious and balanced meal on the table in no time. diff --git a/docs/snippets/technical-reference/pipeline/pipeline_labeller_1.py b/docs/snippets/technical-reference/pipeline/pipeline_labeller_1.py new file mode 100644 index 0000000000..edc6b0bf2a --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipeline_labeller_1.py @@ -0,0 +1,16 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.pipeline import Pipeline +from distilabel.tasks import UltraFeedbackTask + +pipe_labeller = Pipeline( + labeller=OpenAILLM( + model="gpt-4", + task=UltraFeedbackTask.for_instruction_following(), + max_new_tokens=256, + num_threads=8, + openai_api_key=os.getenv("OPENAI_API_KEY"), + temperature=0.3, + ), +) diff --git a/docs/snippets/technical-reference/pipeline/pipeline_labeller_2.py b/docs/snippets/technical-reference/pipeline/pipeline_labeller_2.py new file mode 100644 index 0000000000..2a8f0651cf --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipeline_labeller_2.py @@ -0,0 +1,32 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datasets import Dataset + +dataset_test = Dataset.from_dict( + { + "input": [ + "Describe the capital of Spain in 25 words.", + "Design a conversation between a customer and a customer service agent.", + ], + "generations": [ + ["Santo Domingo is the capital of Dominican Republic"], + [ + "Customer: Hello, I'm having trouble with my purchase.\n\nCustomer Service Agent: I'm sorry to hear that. Could you please tell me more about the issue you are facing?\n\nCustomer: Yes, I ordered a pair of shoes from your company a week ago, but I haven't received them yet.\n\nCustomer Service Agent: I apologize for the inconvenience. Could you please provide me with your order number and full name so I can look into this for you?\n\nCustomer: Sure, my name is John Doe and my order number is ABCD1234.\n\nCustomer Service Agent: Thank you, John. I have checked on your order and it appears that it is still being processed. It should be shipped out within the next 24 hours.\n\nCustomer: That's good to hear, but can you also tell me the expected delivery time?\n\nCustomer Service Agent: Absolutely, based on your location, the estimated delivery time is 3-5 business days after shipping. You will receive a tracking number via email once the item is shipped, which will provide real-time updates on your package.\n\nCustomer: Thanks for the information. One more thing, what is your return policy if the shoes don't fit?\n\nCustomer Service Agent: Our company offers a 30-day return policy. If you are not satisfied with the product or if it doesn't fit, you can return it for a full refund or an exchange within 30 days of delivery. Please keep in mind that the product must be in its original packaging and in the same condition as when you received it.\n\nCustomer: Okay, that's good to know. Thank you for your help.\n\nCustomer Service Agent: You're welcome, John. I'm glad I could assist you. If you have any further questions or concerns, please don't hesitate to reach out to us. Have a great day!" + ], + ], + } +) + +ds_labelled = pipe_labeller.generate(dataset_test) diff --git a/docs/snippets/technical-reference/pipeline/pipeline_labeller_3.py b/docs/snippets/technical-reference/pipeline/pipeline_labeller_3.py new file mode 100644 index 0000000000..0c4195c068 --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipeline_labeller_3.py @@ -0,0 +1,23 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ds_labelled.select_columns(["input", "generations", "rating", "rationale"])[0] +# { +# "input": "Describe the capital of Spain in 25 words.", +# "generations": ["Santo Domingo is the capital of Dominican Republic"], +# "rating": [1.0], +# "rationale": [ +# "The text is irrelevant to the instruction. It describes the capital of the Dominican Republic instead of Spain." +# ], +# } diff --git a/docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_1.py b/docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_1.py new file mode 100644 index 0000000000..7508c8d575 --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_1.py @@ -0,0 +1,41 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from distilabel.llm import InferenceEndpointsLLM, OpenAILLM +from distilabel.pipeline import Pipeline +from distilabel.tasks import TextGenerationTask, UltraFeedbackTask + +pipe_full = Pipeline( + generator=InferenceEndpointsLLM( + endpoint_name=endpoint_name, + endpoint_namespace=endpoint_namespace, + token=token, + task=TextGenerationTask( + system_prompt="You are an expert writer of XKCD, a webcomic of romance, sarcasm, math, and language." + ), + max_new_tokens=512, + do_sample=True, + prompt_format="notus", + ), + labeller=OpenAILLM( + model="gpt-3.5-turbo", + task=UltraFeedbackTask.for_instruction_following(), + max_new_tokens=256, + num_threads=4, + openai_api_key=os.getenv("OPENAI_API_KEY"), + temperature=0.3, + ), +) diff --git a/docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_2.py b/docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_2.py new file mode 100644 index 0000000000..a1bad54079 --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_2.py @@ -0,0 +1,20 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datasets import Dataset + +xkcd_instructions = Dataset.from_dict( + {"input": ["Could you imagine an interview process going sideways?"]} +) +ds_xkcd = pipe_full.generate(xkcd_instructions, num_generations=3) diff --git a/docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_3.py b/docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_3.py new file mode 100644 index 0000000000..fa0fa1863d --- /dev/null +++ b/docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_3.py @@ -0,0 +1,59 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +print(ds_xkcd[1]["generations"][0]) +print("-----" * 5) +print("RATING: ", ds_xkcd[1]["rating"][0]) +print("RATIONALE: ", ds_xkcd[1]["rationale"][0]) + +# Yes, absolutely! Here's a fictional interview scenario turned into an XKCD-style comic: + +# (Interviewee meets with an unsmiling interviewer) + +# Interviewer: Good morning! Have a seat. Tell me about your experience working with teams. + +# Interviewee: Well, I've worked in large teams on group projects before. It could be challenging, but we always managed to pull through. + +# (Smugly) Interviewer: Challenging, huh? (tapping pen on desk) And how did you manage to overcome these challenges? + +# Interviewee: (confidently) Communication was key. I made sure to stay in touch with the team and keep everyone updated on our progress. + +# Interviewer: Communication. Hm. And what if communication failed? + +# Interviewee: (thrown off balance) Well, I mean...there was one time when we couldn't connect via video call. But we picked up the phone, and we all understood what needed to be done. + +# Interviewer: But what if the communication on the technical level failed, say, like a computer system with a software glitch? + +# Interviewee: (feeling the pressure) That's never happened to me before, but if it did, we would have to troubleshoot and find a workaround, right? + +# Interviewer: (smirking) Oh, but finding a workaround could mean delegating responsibilities among the team, which requires communication. It's a vicious cycle! + +# (Interviewee visibly uncomfortable) + +# Interviewer: And what if there was a communication breakdown among the team members themselves? + +# Interviewee: (unsure) I think we would try to sort it out as soon as possible to avoid any further problems. + +# Interviewer: (sarcastically) Yes, avoiding further problems is critical. Don't want to let those deadlines slip, do we? + +# (Interviewer types frantically on their computer keyboard) + +# Interviewer: (softly but wordily) Note to self: Avoid this candidate for team projects. + +# (The interviewer returns his attention back to the interviewee) + +# Interviewer: Well, moving on... +# ------------------------- +# RATING: 4.0 +# RATIONALE: The text provides a fictional interview scenario that aligns with the task goal of imagining an interview process going sideways. It includes dialogue between an interviewer and interviewee, showcasing a breakdown in communication and the interviewer's sarcastic and dismissive attitude towards the interviewee's responses. diff --git a/docs/snippets/technical-reference/tasks/generic_llama2_textgeneration.py b/docs/snippets/technical-reference/tasks/generic_llama2_textgeneration.py new file mode 100644 index 0000000000..49ab3ca205 --- /dev/null +++ b/docs/snippets/technical-reference/tasks/generic_llama2_textgeneration.py @@ -0,0 +1,9 @@ +from distilabel.llm import TransformersLLM +from distilabel.tasks import Llama2TextGenerationTask + +# This snippet uses `TransformersLLM`, but is the same for every other `LLM`. +generator = TransformersLLM( + model=..., + tokenizer=..., + task=Llama2TextGenerationTask(), +) diff --git a/docs/snippets/technical-reference/tasks/generic_openai_self_instruct.py b/docs/snippets/technical-reference/tasks/generic_openai_self_instruct.py new file mode 100644 index 0000000000..c4f9f3987c --- /dev/null +++ b/docs/snippets/technical-reference/tasks/generic_openai_self_instruct.py @@ -0,0 +1,13 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.tasks import SelfInstructTask + +generator = OpenAILLM( + task=SelfInstructTask( + system_prompt="You are a question-answering assistant for...", + application_description="AI assistant", + num_instructions=3, + ), + openai_api_key=os.getenv("OPENAI_API_KEY"), +) diff --git a/docs/snippets/technical-reference/tasks/generic_openai_textgeneration.py b/docs/snippets/technical-reference/tasks/generic_openai_textgeneration.py new file mode 100644 index 0000000000..7f10affc03 --- /dev/null +++ b/docs/snippets/technical-reference/tasks/generic_openai_textgeneration.py @@ -0,0 +1,8 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.tasks import OpenAITextGenerationTask + +generator = OpenAILLM( + task=OpenAITextGenerationTask(), openai_api_key=os.getenv("OPENAI_API_KEY") +) diff --git a/docs/snippets/technical-reference/tasks/generic_transformersllm.py b/docs/snippets/technical-reference/tasks/generic_transformersllm.py new file mode 100644 index 0000000000..a696a5b10f --- /dev/null +++ b/docs/snippets/technical-reference/tasks/generic_transformersllm.py @@ -0,0 +1,9 @@ +from distilabel.llm import TransformersLLM +from distilabel.tasks import TextGenerationTask + +# This snippet uses `TransformersLLM`, but is the same for every other `LLM`. +generator = TransformersLLM( + model=..., + tokenizer=..., + task=TextGenerationTask(), +) diff --git a/docs/snippets/technical-reference/tasks/openai_for_helpfulness.py b/docs/snippets/technical-reference/tasks/openai_for_helpfulness.py new file mode 100644 index 0000000000..d30733bac7 --- /dev/null +++ b/docs/snippets/technical-reference/tasks/openai_for_helpfulness.py @@ -0,0 +1,8 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.tasks import UltraFeedbackTask + +labeller = OpenAILLM( + task=UltraFeedbackTask.for_helpfulness(), openai_api_key=os.getenv("OPENAI_API_KEY") +) diff --git a/docs/snippets/technical-reference/tasks/openai_for_honesty.py b/docs/snippets/technical-reference/tasks/openai_for_honesty.py new file mode 100644 index 0000000000..004c8362a2 --- /dev/null +++ b/docs/snippets/technical-reference/tasks/openai_for_honesty.py @@ -0,0 +1,8 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.tasks import UltraFeedbackTask + +labeller = OpenAILLM( + task=UltraFeedbackTask.for_honesty(), openai_api_key=os.getenv("OPENAI_API_KEY") +) diff --git a/docs/snippets/technical-reference/tasks/openai_for_instruction_following.py b/docs/snippets/technical-reference/tasks/openai_for_instruction_following.py new file mode 100644 index 0000000000..bf01b134ba --- /dev/null +++ b/docs/snippets/technical-reference/tasks/openai_for_instruction_following.py @@ -0,0 +1,9 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.tasks import UltraFeedbackTask + +labeller = OpenAILLM( + task=UltraFeedbackTask.for_instruction_following(), + openai_api_key=os.getenv("OPENAI_API_KEY"), +) diff --git a/docs/snippets/technical-reference/tasks/openai_for_text_quality.py b/docs/snippets/technical-reference/tasks/openai_for_text_quality.py new file mode 100644 index 0000000000..bd893dd779 --- /dev/null +++ b/docs/snippets/technical-reference/tasks/openai_for_text_quality.py @@ -0,0 +1,9 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.tasks import UltraFeedbackTask + +labeller = OpenAILLM( + task=UltraFeedbackTask.for_text_quality(), + openai_api_key=os.getenv("OPENAI_API_KEY"), +) diff --git a/docs/snippets/technical-reference/tasks/openai_for_truthfulness.py b/docs/snippets/technical-reference/tasks/openai_for_truthfulness.py new file mode 100644 index 0000000000..500e8701aa --- /dev/null +++ b/docs/snippets/technical-reference/tasks/openai_for_truthfulness.py @@ -0,0 +1,9 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.tasks import UltraFeedbackTask + +labeller = OpenAILLM( + task=UltraFeedbackTask.for_truthfulness(), + openai_api_key=os.getenv("OPENAI_API_KEY"), +) diff --git a/docs/snippets/technical-reference/tasks/openai_judgelm.py b/docs/snippets/technical-reference/tasks/openai_judgelm.py new file mode 100644 index 0000000000..6fc04ff2d0 --- /dev/null +++ b/docs/snippets/technical-reference/tasks/openai_judgelm.py @@ -0,0 +1,6 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.tasks import JudgeLMTask + +labeller = OpenAILLM(task=JudgeLMTask(), openai_api_key=os.getenv("OPENAI_API_KEY")) diff --git a/docs/snippets/technical-reference/tasks/openai_ultrajudge.py b/docs/snippets/technical-reference/tasks/openai_ultrajudge.py new file mode 100644 index 0000000000..250fefa19e --- /dev/null +++ b/docs/snippets/technical-reference/tasks/openai_ultrajudge.py @@ -0,0 +1,6 @@ +import os + +from distilabel.llm import OpenAILLM +from distilabel.tasks import UltraJudgeTask + +labeller = OpenAILLM(task=UltraJudgeTask(), openai_api_key=os.getenv("OPENAI_API_KEY")) diff --git a/docs/snippets/technical-reference/tasks/ultrafeedback.py b/docs/snippets/technical-reference/tasks/ultrafeedback.py new file mode 100644 index 0000000000..cc8b81e4e9 --- /dev/null +++ b/docs/snippets/technical-reference/tasks/ultrafeedback.py @@ -0,0 +1,29 @@ +from textwrap import dedent + +from distilabel.tasks.preference.ultrafeedback import Rating, UltraFeedbackTask + +task_description = dedent( + """ + # General Text Quality Assessment + Evaluate the model's outputs based on various criteria: + 1. **Correctness & Informativeness**: Does the output provide accurate and helpful information? + 2. **Honesty & Uncertainty**: How confidently does the model convey its information, and does it express uncertainty appropriately? + 3. **Truthfulness & Hallucination**: Does the model introduce misleading or fabricated details? + 4. **Instruction Following**: Does the model's output align with given instructions and the user's intent? + Your role is to provide a holistic assessment considering all the above factors. + + **Scoring**: Rate outputs 1 to 3 based on the overall quality, considering all aspects: + """ +) + +ratings = [ + Rating(value=1, description="Low Quality"), + Rating(value=2, description="Moderate Quality"), + Rating(value=3, description="Good Quality"), +] + +ultrafeedback_task = UltraFeedbackTask( + system_prompt="Your role is to evaluate text quality based on given criteria", + task_description=task_description, + ratings=ratings, +) diff --git a/docs/snippets/technical-reference/tasks/ultrajudge.py b/docs/snippets/technical-reference/tasks/ultrajudge.py new file mode 100644 index 0000000000..52c8f243e7 --- /dev/null +++ b/docs/snippets/technical-reference/tasks/ultrajudge.py @@ -0,0 +1,13 @@ +from distilabel.tasks import UltraJudgeTask + +# To see the complete system_prompt and task_description please take a look at the UltraJudgeTask definition +ultrajudge_task = UltraJudgeTask( + system_prompt="You are an evaluator tasked with assessing AI assistants' responses from the perspective of typical user preferences...", + task_description="Your task is to rigorously evaluate the performance of...", + areas=[ + "Practical Accuracy", + "Clarity & Transparency", + "Authenticity & Reliability", + "Compliance with Intent", + ], +) diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css index c14316c8d2..e32447d5d9 100644 --- a/docs/stylesheets/extra.css +++ b/docs/stylesheets/extra.css @@ -2,4 +2,14 @@ --md-primary-fg-color: #FF675F; --md-primary-fg-color--light: #FF675F; --md-primary-fg-color--dark: #FF675F; +} +[data-md-color-scheme="default"] { + --md-primary-fg-color: #000000; + --md-typeset-a-color: #FF675F; + --md-accent-fg-color: #F7A399; +} +[data-md-color-scheme="slate"] { + --md-primary-fg-color: #000000; + --md-typeset-a-color: #F7A399; + --md-accent-fg-color: #FF675F; } \ No newline at end of file diff --git a/docs/technical-reference/index.md b/docs/technical-reference/index.md new file mode 100644 index 0000000000..1a473425c8 --- /dev/null +++ b/docs/technical-reference/index.md @@ -0,0 +1,5 @@ +# Technical reference + +Explore `distilabel`'s technical references for an understanding of its components and their interactions, or directly access the API Reference for specific details. + +If you are not familiar with the different components, consider taking a look at the [concepts](../concepts.md) first. \ No newline at end of file diff --git a/docs/technical-reference/llms.md b/docs/technical-reference/llms.md new file mode 100644 index 0000000000..dcc234c8ce --- /dev/null +++ b/docs/technical-reference/llms.md @@ -0,0 +1,101 @@ +# LLMs + +## LLM + +The [`LLM`][distilabel.llm.base.LLM] class encapsulates the functionality for interacting with a language model. + +It distinguishes between *task* specifications and configurable parameters that influence the LLM's behavior. + +For illustration purposes, we employ the [`TextGenerationTask`][distilabel.tasks.text_generation.base.TextGenerationTask] in this section and guide you to the dedicated [`Tasks`](../technical-reference/tasks.md) section for comprehensive details. + +LLM classes share several general parameters and define implementation-specific ones. Let's explain the general parameters first and the generate method, and then the specifics for each class. + +### General Parameters + +Let's briefly introduce the general parameters we may find[^1]: + +[^1]: + You can take a look at this blog post from [cohere](https://txt.cohere.com/llm-parameters-best-outputs-language-ai/) for a thorough explanation of the different parameters. + +- `max_new_tokens`: + + This parameter controls the maximum number of tokens the LLM is allowed to use. + +- `temperature`: + + Parameter associated to the creativity of the model, a value close to 0 makes the model more deterministic, while higher values make the model more "creative". + +- `top_k` and `top_p`: + + `top_k` limits the number of tokens the model is allowed to use to generate the following token sorted by probability, while `top_p` limits the number of tokens the model can use for the next token, but in terms of the sum of their probabilities. + +- `frequency_penalty` and `presence_penalty`: + + The frequency penalty penalizes tokens that have already appeard in the generated text, limiting the possibility of those appearing again, and the `presence_penalty` penalizes regardless of hte frequency. + +- `prompt_format` and `prompt_formatting_fn`: + + These two parameters allow to tweak the prompt of our models, for example we can direct the `LLM` to format the prompt according to one of the defined formats, while `prompt_formatting_fn` allows to pass a function that will be applied to the prompt before the generation, for extra control of what we ingest to the model. + +### Generate method + +Once you create an `LLM`, you use the `generate` method to interact with it. This method requires inputs for text generation and specifies the number of desired generations. The output will be in the form of lists containing `LLMOutput`[^2] objects, which act as a general container for the LLM's results. + +[^2]: + Or it can also return lists of *Futures* containing the lists of these `LLMOutputs`, if we deal with an asynchronous or thread based API. + +Let's see the different LLMs that are implemented in `distilabel` (we can think of them in terms of the engine that generates the text for us): + +## OpenAI + +These may be the default choice for your ambitious tasks. + +For the API reference visit [OpenAILLM][distilabel.llm.openai.OpenAILLM]. + +```python +--8<-- "docs/snippets/technical-reference/llm/openai_generate.py" +``` + +## Llama.cpp + +Applicable for local execution of Language Models (LLMs). Utilize this LLM when you have access to the quantized weights of your selected model for interaction. + +Let's see an example using [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1). First, you can download the weights from the following [link](https://huggingface.co/TheBloke/notus-7B-v1-GGUF): + +```python +--8<-- "docs/snippets/technical-reference/llm/llamacpp_generate.py" +``` + +For the API reference visit [LlammaCppLLM][distilabel.llm.llama_cpp.LlamaCppLLM]. + +## vLLM + +For the API reference visit [vLLM][distilabel.llm.vllm.vLLM]. + +## HuggingFace LLMs + +This section explains two different ways to use HuggingFace models: + +### Transformers + +This is the option to utilize a model deployed on Hugging Face's model hub. Load the model and tokenizer in the standard manner as done locally, and proceed to instantiate your class. + +For the API reference visit [TransformersLLM][distilabel.llm.huggingface.transformers.TransformersLLM]. + +Let's see an example using [notus-7b-v1](https://huggingface.co/argilla/notus-7b-v1): + +```python +--8<-- "docs/snippets/technical-reference/llm/transformers_generate.py" +``` + +### Inference Endpoints + +HuggingFace provides a streamlined approach for deploying models through [inference endpoints](https://huggingface.co/inference-endpoints) on their infrastructure. Opt for this solution if your model is hosted on HuggingFace. + +For the API reference visit [InferenceEndpointsLLM][distilabel.llm.huggingface.inference_endpoints.InferenceEndpointsLLM]. + +Let's see how to interact with these LLMs: + +```python +--8<-- "docs/snippets/technical-reference/llm/inference_endpoint_generate.py" +``` diff --git a/docs/technical-reference/pipeline.md b/docs/technical-reference/pipeline.md new file mode 100644 index 0000000000..436fc3e4d6 --- /dev/null +++ b/docs/technical-reference/pipeline.md @@ -0,0 +1,107 @@ +# Pipelines + +This section will detail the [`Pipeline`][distilabel.pipeline.Pipeline], providing guidance on creating and using them. + +## Pipeline + +The [Pipeline][distilabel.pipeline.Pipeline] class is a central component in `distilabel`, responsible for crafting datasets. It manages the generation of datasets and oversees the interaction between the generator and labeller `LLMs`. + +You create an instance of the [`Pipeline`][distilabel.pipeline.Pipeline] by providing a *generator* and an optional *labeller* [LLM][distilabel.llm.base.LLM]. Interactions with it are facilitated through its `generate` method. This method requires a [`dataset`](https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset), specifies the *num_generations* to determine the number of examples to be created, and includes additional parameters for controlling the *batch_size* and managing the generation process. + +Let's start by a Pipeline with a single `LLM` as a generator. + +### Generator + +We will create a [`Pipeline`][distilabel.pipeline.Pipeline] that will use [Notus](https://huggingface.co/argilla/notus-7b-v1) from a Huggingface [Inference Endpoint][distilabel.llm.InferenceEndpointsLLM]. For this matter, we need to create a [TextGenerationTask][distilabel.tasks.TextGenerationTask], and specify the format we want to use for our `Prompt`, in this case *notus*, which corresponds to the same for *zephyr*. + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipeline_generator_1.py" +``` + +We've set up our pipeline using a specialized [`TextGenerationTask`](distilabel.tasks.text_generation.base.TextGenerationTask) (refer to the [tasks section](./tasks.md) for more task details), and an [InferenceEndpointsLLM][distilabel.llm.huggingface.inference_endpoints.InferenceEndpointsLLM] configured for [`notus-7b-v1`](https://huggingface.co/argilla/notus-7b-v1), although any of the available `LLMs` will work. + +To utilize the [Pipeline][distilabel.pipeline.Pipeline] for dataset generation, we call the generate method. We provide it with the input dataset and specify the desired number of generations. In this example, we've prepared a `Dataset` with a single row to illustrate the process. This dataset contains one row, and we'll trigger 2 generations from it: + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipeline_generator_2.py" +``` + +Now, let's examine the dataset that was generated. It's a [`CustomDataset`][distilabel.dataset.CustomDataset], equipped with additional features for seamless interaction with [`Argilla`](https://github.com/argilla-io/argilla). + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipeline_generator_3.py" +``` + +### Labeller + +Next, we move on to labelLing a dataset. Just as before, we need an `LLM` for our `Pipeline`. In this case we will use [`OpenAILLM`][distilabel.llm.openai.OpenAILLM] with `gpt-4`, and a `PreferenceTask`, [UltraFeedbackTask][distilabel.tasks.preference.ultrafeedback.UltraFeedbackTask] for instruction following. + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipeline_labeller_1.py" +``` + +For this example dataset, we've extracted 2 sample rows from the [UltraFeedback binarized dataset](https://huggingface.co/datasets/argilla/ultrafeedback-binarized-preferences), formatted as expected by the default `LLM` and `Task`. + +We've selected two distinct examples, one correctly labeled and the other incorrectly labeled in the original dataset. In this instance, the `dataset` being generated includes two columns: the *input*, as seen in the generator, and a *generations* column containing the model's responses. + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipeline_labeller_2.py" +``` + +Let's select the relevant columns from the labelled dataset, and take a look at the first record. This allows us to observe the *rating* and the accompanying *rationale* that provides an explanation. + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipeline_labeller_3.py" +``` + +### Generator and Labeller + +In the final scenario, we have a [`Pipeline`][distilabel.pipeline.Pipeline] utilizing both a *generator* and a *labeller* `LLM`. Once more, we'll employ the [Inference Endpoint][distilabel.llm.InferenceEndpointsLLM] with `notus-7b-v1` for the *generator*, using a different *system prompt* this time. As for the labeller, we'll use `gpt-3.5-turbo`, which will label the examples for *instruction following*. + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_1.py" +``` + +For this example, we'll set up a pipeline to generate and label a dataset of short stories inspired by [XKCD](https://xkcd.com/). To do this, we'll define the *system_prompt* for the `NotusTextGenerationTask`. The dataset will follow the same format we used for the generator scenario, featuring an *input* column with the examples, in this case, just one. + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_2.py" +``` + +We will now take a look to one of the *generations*, along with the *rating* and *rational* given by our *labeller* `LLM`: + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipeline_labeller_generator_3.py" +``` + +## pipeline + +Considering recurring patterns in dataset creation, we can facilitate the process by utilizing the [`Pipeline`][distilabel.pipeline.Pipeline]. This is made simpler through the [`pipeline`][distilabel.pipeline.pipeline] function, which provides the necessary parameters for creating a `Pipeline`. + +In the code snippet below, we use the [`pipeline`][distilabel.pipeline.pipeline] function to craft a `pipeline` tailored for a *preference task*, specifically focusing on *text-quality* as the *subtask*. If we don't initially provide a *labeller* [`LLM`][distilabel.llm.base.LLM], we can specify the subtask we want our `pipeline` to address. By default, this corresponds to [`UltraFeedbackTask`][distilabel.tasks.preference.ultrafeedback.UltraFeedbackTask]. It's mandatory to specify the generator of our choice; however, the labeller defaults to `gpt-3.5-turbo`. Optional parameters required for [OpenAILLM][distilabel.llm.openai.OpenAILLM] can also be passed as optional keyword arguments. + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipe_1.py" +``` + +For the dataset, we'll begin with three rows from [HuggingFaceH4/instruction-dataset](https://huggingface.co/datasets/HuggingFaceH4/instruction-dataset). We'll request two generations with checkpoints enabled to safeguard the data in the event of any failures, which is the default behavior. + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipe_2.py" +``` + +Finally, let's see one of the examples from the dataset: + +```python +--8<-- "docs/snippets/technical-reference/pipeline/pipe_3.py" +``` + +The API reference can be found here: [pipeline][distilabel.pipeline.pipeline] + +## Argilla integration + +The [CustomDataset][distilabel.dataset.CustomDataset] generated entirely by AI models may require some additional human processing. To facilitate human feedback, the dataset can be uploaded to [`Argilla`](https://github.com/argilla-io/argilla). This process involves logging into an [`Argilla`](https://docs.argilla.io/en/latest/getting_started/cheatsheet.html#connect-to-argilla) instance, converting the dataset to the required format using `CustomDataset.to_argilla()`, and subsequently using push_to_argilla on the resulting dataset: + +```python +--8<-- "docs/snippets/technical-reference/pipeline/argilla.py" +``` diff --git a/docs/technical-reference/tasks.md b/docs/technical-reference/tasks.md new file mode 100644 index 0000000000..d449cf3b72 --- /dev/null +++ b/docs/technical-reference/tasks.md @@ -0,0 +1,159 @@ +# Tasks + +The `Task` class takes charge of setting how the LLM behaves, deciding whether it acts as a *generator* or a *labeller*. To accomplish this, the `Task` class creates a prompt using a template that will be sent to the [`LLM`](../technical-reference/llms.md). It specifies the necessary input arguments for generating the prompt and identifies the output arguments to be extracted from the `LLM` response. The `Task` class yields a `Prompt` that can generate a string with the format needed, depending on the specific `LLM` used. + +`distilabel` distinguishes between two primary categories of tasks: those focused on text generation and those centered around labelling. These `Task` classes delineate the LLM's conduct, be it the creation of textual content or the assignment of labels to text, each with precise guidelines tailored to their respective functionalities. Users can seamlessly leverage these distinct task types to tailor the LLM's behavior according to their specific application needs. + +Let's see the different tasks in `distilabel`: + +## Text Generation + +These set of classes are designed to steer a `LLM` in generating text with specific guidelines. They provide a structured approach to instruct the LLM on generating content in a manner tailored to predefined criteria. + +The following tasks for text generation are implemented: + +### TextGenerationTask + +This is the base class for *text generation*, and includes the following fields for guiding the generation process: `system_prompt`, which serves as the initial instruction or query given to the LLM, guiding it on what kind of information or output is expected. A list of `principles` to inject on the `system_prompt`, which by default correspond to those defiend in the UltraFeedback paper[^1], and lastly a distribution for these principles so the `LLM` can be directed towards the different principles with a more customized behaviour. + +[^1]: + The principles can be found [here][distilabel.tasks.text_generation.principles] in the codebase. More information on the *Principle Sampling* can be found in the [UltraFeedfack repository](https://github.com/OpenBMB/UltraFeedback#principle-sampling). + +The following methods define a task: + +- `generate_prompt`: This method will be used by the `LLM` during the creation of the prompts that will be used by the different models. +- `parse_output`: After the `LLM` has generated the content, this method will be called after on the raw outputs to extract the relevant content. +- `input_args_names` and `output_args_names`: These methods are used in the [`Pipeline`](../technical-reference/pipeline.md) to process the datasets. The first one defines the columns that will be extracted from the dataset to build the prompt in case of a `LLM` that acts as a generator or labeller alone, or the columns that should be placed in the dataset to be processed by the *labeller* `LLM`, in the case of a `Pipeline` that has both a *generator* and a *labeller*. The second one is in charge of inserting the defined fields as columns of the dataset generated dataset. + +After defining a task, the only action required is to pass it to the corresponding `LLM`. All the intricate processes are then handled internally: + +```python +--8<-- "docs/snippets/technical-reference/tasks/generic_transformersllm.py" +``` + +For the API reference visit [TextGenerationTask][distilabel.tasks.text_generation.base.TextGenerationTask]. + +### Llama2TextGenerationTask + +This class inherits from the `TextGenerationTask` and it's specially prepared to deal with prompts in the form of the *Llama2* model, so it should be the go to task for `LLMs` intented for text generation that were trained using this prompt format. The specific prompt formats can be found in the source code of the [Prompt][distilabel.tasks.prompt.Prompt] class. + +```python +--8<-- "docs/snippets/technical-reference/tasks/generic_llama2_textgeneration.py" +``` + +For the API reference visit [Llama2TextGenerationTask][distilabel.tasks.text_generation.llama.Llama2TextGenerationTask]. + +### OpenAITextGenerationTask + +The OpenAI task for text generation is similar to the `Llama2TextGenerationTask`, but with the specific prompt format expected by the *chat completion* task from OpenAI. + +```python +--8<-- "docs/snippets/technical-reference/tasks/generic_openai_textgeneration.py" +``` + +For the API reference visit [OpenAITextGenerationTask][distilabel.tasks.text_generation.openai.OpenAITextGenerationTask]. + +### SelfInstructTask + +The task specially designed to build the prompts following the Self-Instruct paper: [SELF-INSTRUCT: Aligning Language Models +with Self-Generated Instructions](https://arxiv.org/pdf/2212.10560.pdf). + +From the original [repository](https://github.com/yizhongw/self-instruct/tree/main#how-self-instruct-works): *The Self-Instruct process is an iterative bootstrapping algorithm that starts with a seed set of manually-written instructions and uses them to prompt the language model to generate new instructions and corresponding input-output instances*, so this `Task` is specially interesting for generating new datasets from a set of predefined topics. + +```python +--8<-- "docs/snippets/technical-reference/tasks/generic_openai_self_instruct.py" +``` + +For the API reference visit [SelfInstructTask][distilabel.tasks.text_generation.self_instruct.SelfInstructTask]. + +## Labelling + +Instead of generating text, you can instruct the `LLM` to label datasets. The existing tasks are designed specifically for creating `Preference` datasets. + +### Preference + +Preference datasets for Language Models (LLMs) are sets of information that show how people rank or prefer one thing over another in a straightforward and clear manner. These datasets help train language models to understand and generate content that aligns with user preferences, enhancing the model's ability to generate contextually relevant and preferred outputs. + +Contrary to the `TextGenerationTask`, the `PreferenceTask` is not intended for direct use. It implements the default methods `input_args_names` and `output_args_names`, but `generate_prompt` and `parse_output` are specific to each `PreferenceTask`. Examining the `output_args_names` reveals that the generation will encompass both the rating and the rationale that influenced that rating. + +#### UltraFeedbackTask + +This task is specifically designed to build the prompts following the format defined in the ["UltraFeedback: Boosting Language Models With High Quality Feedback"](https://arxiv.org/pdf/2310.01377.pdf) paper. + +From the original [repository](https://github.com/OpenBMB/UltraFeedback): *To collect high-quality preference and textual feedback, we design a fine-grained annotation instruction, which contains 4 different aspects, namely instruction-following, truthfulness, honesty and helpfulness*. This `Task` is designed to label datasets following the different aspects defined for the UltraFeedback dataset creation. + +The following snippet can be used as a simplified UltraFeedback Task, for which we define 3 different ratings, but take into account the predefined versions are intended to be used out of the box: + +```python +--8<-- "docs/snippets/technical-reference/tasks/ultrafeedback.py" +``` + +=== "Text Quality" + + The following example uses a `LLM` to examinate the data for text quality criteria, which includes the different criteria from UltraFeedback (Correctness & Informativeness, Honesty & Uncertainty, Truthfulness & Hallucination and Instruction Following): + + ```python + --8<-- "docs/snippets/technical-reference/tasks/openai_for_text_quality.py" + ``` + +=== "Helpfulness" + + The following example creates a UltraFeedback task to emphasize helpfulness, that is overall quality and correctness of the output: + + ```python + --8<-- "docs/snippets/technical-reference/tasks/openai_for_helpfulness.py" + ``` + +=== "Truthfulness" + + The following example creates a UltraFeedback task to emphasize truthfulness and hallucination assessment: + + ```python + --8<-- "docs/snippets/technical-reference/tasks/openai_for_truthfulness.py" + ``` + +=== "Honesty" + + The following example creates a UltraFeedback task to emphasize honesty and uncertainty expression assessment: + + ```python + --8<-- "docs/snippets/technical-reference/tasks/openai_for_honesty.py" + ``` + +=== "Instruction Following" + + The following example creates a UltraFeedback task to emphasize the evaluation of alignment between output and intent: + + ```python + --8<-- "docs/snippets/technical-reference/tasks/openai_for_instruction_following.py" + ``` + +For the API reference visit [UltraFeedbackTask][distilabel.tasks.preference.ultrafeedback.UltraFeedbackTask]. + +#### JudgeLMTask + +The task specially designed to build the prompts following the UltraFeedback paper: [JudgeLM: Fine-tuned Large Language Models Are Scalable Judges](https://arxiv.org/pdf/2310.17631.pdf). This task is designed to evaluate the performance of AI assistants. + +```python +--8<-- "docs/snippets/technical-reference/tasks/openai_judgelm.py" +``` + +For the API reference visit [JudgeLMTask][distilabel.tasks.preference.judgelm.JudgeLMTask]. + +#### UltraJudgeTask + +This class implements a `PreferenceTask` specifically for a better evaluation using AI Feedback. The task is defined based on both UltraFeedback and JudgeLM, but with several improvements / modifications. + +It introduces an additional argument to differentiate various areas for processing. While these areas can be customized, the default values are as follows: + +```python +--8<-- "docs/snippets/technical-reference/tasks/ultrajudge.py" +``` + +Which can be directly used in the following way: + +```python +--8<-- "docs/snippets/technical-reference/tasks/openai_ultrajudge.py" +``` + +For the API reference visit [UltraJudgeTask][distilabel.tasks.preference.ultrajudge.UltraJudgeTask]. diff --git a/mkdocs.yml b/mkdocs.yml index b8ac6fa4f9..f4a68d9f0f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,13 +13,12 @@ extra_css: theme: name: material - logo: assets/distilabel-icon.svg + logo: assets/logo.svg favicon: assets/distilabel-icon.svg features: - navigation.instant - navigation.tabs - toc.follow - - toc.integrate - content.code.copy - content.code.annotate palette: @@ -60,6 +59,7 @@ markdown_extensions: - pymdownx.superfences - pymdownx.tabbed: alternate_style: true + - footnotes plugins: - search @@ -70,9 +70,21 @@ plugins: nav_file: SUMMARY.md - section-index - mkdocstrings + - mknotebooks nav: - Getting started: index.md - Concepts: concepts.md - - Guides: guides.md - - Code Reference: reference/ + - Learn: + - learn/index.md + - Tutorials: + - learn/tutorials/index.md + - User Guides: + - learn/user-guides/index.md + - Technical References: + - Concept Guides: + - technical-reference/index.md + - LLMs: technical-reference/llms.md + - Tasks: technical-reference/tasks.md + - Pipelines: technical-reference/pipeline.md + - API Reference: reference/ diff --git a/pyproject.toml b/pyproject.toml index b87a8e781b..8cb60158ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ docs = [ "mkdocstrings[python] >= 0.24.0", "mkdocs-literate-nav >= 0.6.1", "mkdocs-section-index >= 0.3.8", + "mkdocs-gen-files >= 0.5.0", ] [project.urls] @@ -57,6 +58,7 @@ path = "src/distilabel/__init__.py" line-length = 88 select = ["E", "W", "F", "I", "C", "B"] ignore = ["E501", "B905", "B008"] +exclude = ["docs"] [tool.pytest.ini_options] testpaths = ["tests"]