Add basic examples for tasks to show in the components gallery (#724)

argilla-io · Jun 12, 2024 · ce8dde8 · ce8dde8
1 parent ae6d7fa
commit ce8dde8
Show file tree

Hide file tree

Showing 15 changed files with 718 additions and 4 deletions.
diff --git a/src/distilabel/steps/tasks/complexity_scorer.py b/src/distilabel/steps/tasks/complexity_scorer.py
@@ -59,6 +59,32 @@ class ComplexityScorer(Task):
 
     References:
         - [`What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning`](https://arxiv.org/abs/2312.15685)
+
+    Examples:
+
+        Evaluate the complexity of your instructions:
+
+        ```python
+        from distilabel.steps.tasks import ComplexityScorer
+        from distilabel.llms.huggingface import InferenceEndpointsLLM
+
+        # Consider this as a placeholder for your actual LLM.
+        scorer = ComplexityScorer(
+            llm=InferenceEndpointsLLM(
+                model_id="mistralai/Mistral-7B-Instruct-v0.2",
+            )
+        )
+
+        scorer.load()
+
+        result = next(
+            scorer.process(
+                [{"instructions": ["plain instruction", "highly complex instruction"]}]
+            )
+        )
+        # result
+        # [{'instructions': ['plain instruction', 'highly complex instruction'], 'model_name': 'test', 'scores': [1, 5], 'distilabel_metadata': {'raw_output_complexity_scorer_0': 'output'}}]
+        ```
     """
 
     _template: Union[Template, None] = PrivateAttr(...)

diff --git a/src/distilabel/steps/tasks/evol_instruct/base.py b/src/distilabel/steps/tasks/evol_instruct/base.py
@@ -69,6 +69,86 @@ class EvolInstruct(Task):
     References:
         - [WizardLM: Empowering Large Language Models to Follow Complex Instructions](https://arxiv.org/abs/2304.12244)
         - [GitHub: h2oai/h2o-wizardlm](https://github.com/h2oai/h2o-wizardlm)
+
+    Examples:
+
+        Evolve an instruction using an LLM:
+
+        ```python
+        from distilabel.steps.tasks import EvolInstruct
+        from distilabel.llms.huggingface import InferenceEndpointsLLM
+
+        # Consider this as a placeholder for your actual LLM.
+        evol_instruct = EvolInstruct(
+            llm=InferenceEndpointsLLM(
+                model_id="mistralai/Mistral-7B-Instruct-v0.2",
+            ),
+            num_evolutions=2,
+        )
+
+        evol_instruct.load()
+
+        result = next(evol_instruct.process([{"instruction": "common instruction"}]))
+        # result
+        # [{'instruction': 'common instruction', 'evolved_instruction': 'evolved instruction', 'model_name': 'model_name'}]
+        ```
+
+        Keep the iterations of the evolutions:
+
+        ```python
+        from distilabel.steps.tasks import EvolInstruct
+        from distilabel.llms.huggingface import InferenceEndpointsLLM
+
+        # Consider this as a placeholder for your actual LLM.
+        evol_instruct = EvolInstruct(
+            llm=InferenceEndpointsLLM(
+                model_id="mistralai/Mistral-7B-Instruct-v0.2",
+            ),
+            num_evolutions=2,
+            store_evolutions=True,
+        )
+
+        evol_instruct.load()
+
+        result = next(evol_instruct.process([{"instruction": "common instruction"}]))
+        # result
+        # [
+        #     {
+        #         'instruction': 'common instruction',
+        #         'evolved_instructions': ['initial evolution', 'final evolution'],
+        #         'model_name': 'model_name'
+        #     }
+        # ]
+        ```
+
+        Generate answers for the instructions in a single step:
+
+        ```python
+        from distilabel.steps.tasks import EvolInstruct
+        from distilabel.llms.huggingface import InferenceEndpointsLLM
+
+        # Consider this as a placeholder for your actual LLM.
+        evol_instruct = EvolInstruct(
+            llm=InferenceEndpointsLLM(
+                model_id="mistralai/Mistral-7B-Instruct-v0.2",
+            ),
+            num_evolutions=2,
+            generate_answers=True,
+        )
+
+        evol_instruct.load()
+
+        result = next(evol_instruct.process([{"instruction": "common instruction"}]))
+        # result
+        # [
+        #     {
+        #         'instruction': 'common instruction',
+        #         'evolved_instruction': 'evolved instruction',
+        #         'answer': 'answer to the instruction',
+        #         'model_name': 'model_name'
+        #     }
+        # ]
+        ```
     """
 
     num_evolutions: int

diff --git a/src/distilabel/steps/tasks/evol_instruct/evol_complexity/base.py b/src/distilabel/steps/tasks/evol_instruct/evol_complexity/base.py
@@ -24,7 +24,7 @@ class EvolComplexity(EvolInstruct):
     """Evolve instructions to make them more complex using an `LLM`.
 
     `EvolComplexity` is a task that evolves instructions to make them more complex,
-    and it is based in the EvolInstruct task, but using slight different prompts, but the
+    and it is based in the EvolInstruct task, using slight different prompts, but the
     exact same evolutionary approach.
 
     Attributes:
@@ -61,6 +61,29 @@ class EvolComplexity(EvolInstruct):
     References:
         - [What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning](https://arxiv.org/abs/2312.15685)
         - [WizardLM: Empowering Large Language Models to Follow Complex Instructions](https://arxiv.org/abs/2304.12244)
+
+    Examples:
+
+        Evolve an instruction using an LLM:
+
+        ```python
+        from distilabel.steps.tasks import EvolComplexity
+        from distilabel.llms.huggingface import InferenceEndpointsLLM
+
+        # Consider this as a placeholder for your actual LLM.
+        evol_complexity = EvolComplexity(
+            llm=InferenceEndpointsLLM(
+                model_id="mistralai/Mistral-7B-Instruct-v0.2",
+            ),
+            num_evolutions=2,
+        )
+
+        evol_complexity.load()
+
+        result = next(evol_complexity.process([{"instruction": "common instruction"}]))
+        # result
+        # [{'instruction': 'common instruction', 'evolved_instruction': 'evolved instruction', 'model_name': 'model_name'}]
+        ```
     """
 
     mutation_templates: Dict[str, str] = MUTATION_TEMPLATES
diff --git a/src/distilabel/steps/tasks/evol_instruct/evol_complexity/generator.py b/src/distilabel/steps/tasks/evol_instruct/evol_complexity/generator.py
@@ -59,6 +59,29 @@ class EvolComplexityGenerator(EvolInstructGenerator):
     References:
         - [What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning](https://arxiv.org/abs/2312.15685)
         - [WizardLM: Empowering Large Language Models to Follow Complex Instructions](https://arxiv.org/abs/2304.12244)
+
+    Examples:
+
+        Generate evolved instructions without initial instructions:
+
+        ```python
+        from distilabel.steps.tasks import EvolComplexityGenerator
+        from distilabel.llms.huggingface import InferenceEndpointsLLM
+
+        # Consider this as a placeholder for your actual LLM.
+        evol_complexity_generator = EvolComplexityGenerator(
+            llm=InferenceEndpointsLLM(
+                model_id="mistralai/Mistral-7B-Instruct-v0.2",
+            ),
+            num_instructions=2,
+        )
+
+        evol_complexity_generator.load()
+
+        result = next(scorer.process())
+        # result
+        # [{'instruction': 'generated instruction', 'model_name': 'test'}]
+        ```
     """
 
     mutation_templates: Dict[str, str] = GENERATION_MUTATION_TEMPLATES
diff --git a/src/distilabel/steps/tasks/evol_instruct/generator.py b/src/distilabel/steps/tasks/evol_instruct/generator.py
@@ -75,6 +75,29 @@ class EvolInstructGenerator(GeneratorTask):
     References:
         - [WizardLM: Empowering Large Language Models to Follow Complex Instructions](https://arxiv.org/abs/2304.12244)
         - [GitHub: h2oai/h2o-wizardlm](https://github.com/h2oai/h2o-wizardlm)
+
+    Examples:
+
+        Generate evolved instructions without initial instructions:
+
+        ```python
+        from distilabel.steps.tasks import EvolInstructGenerator
+        from distilabel.llms.huggingface import InferenceEndpointsLLM
+
+        # Consider this as a placeholder for your actual LLM.
+        evol_instruct_generator = EvolInstructGenerator(
+            llm=InferenceEndpointsLLM(
+                model_id="mistralai/Mistral-7B-Instruct-v0.2",
+            ),
+            num_instructions=2,
+        )
+
+        evol_instruct_generator.load()
+
+        result = next(scorer.process())
+        # result
+        # [{'instruction': 'generated instruction', 'model_name': 'test'}]
+        ```
     """
 
     num_instructions: int

diff --git a/src/distilabel/steps/tasks/evol_quality/base.py b/src/distilabel/steps/tasks/evol_quality/base.py
@@ -65,6 +65,42 @@ class EvolQuality(Task):
 
     References:
         - [`What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning`](https://arxiv.org/abs/2312.15685)
+
+    Examples:
+
+        Evolve the quality of the responses given a prompt:
+
+        ```python
+        from distilabel.steps.tasks import EvolQuality
+        from distilabel.llms.huggingface import InferenceEndpointsLLM
+
+        # Consider this as a placeholder for your actual LLM.
+        evol_quality = EvolQuality(
+            llm=InferenceEndpointsLLM(
+                model_id="mistralai/Mistral-7B-Instruct-v0.2",
+            ),
+            num_evolutions=2,
+        )
+
+        evol_quality.load()
+
+        result = next(
+            evol_quality.process(
+                [
+                    {"instruction": "common instruction", "response": "a response"},
+                ]
+            )
+        )
+        # result
+        # [
+        #     {
+        #         'instruction': 'common instruction',
+        #         'response': 'a response',
+        #         'evolved_response': 'evolved response',
+        #         'model_name': '"mistralai/Mistral-7B-Instruct-v0.2"'
+        #     }
+        # ]
+        ```
     """
 
     num_evolutions: int

diff --git a/src/distilabel/steps/tasks/generate_embeddings.py b/src/distilabel/steps/tasks/generate_embeddings.py
@@ -47,6 +47,33 @@ class GenerateEmbeddings(Step):
 
     References:
         - [What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning](https://arxiv.org/abs/2312.15685)
+
+    Examples:
+
+        Rank LLM candidates:
+
+        ```python
+        from distilabel.steps.tasks import GenerateEmbeddings
+        from distilabel.llms.huggingface import TransformersLLM
+
+        # Consider this as a placeholder for your actual LLM.
+        embedder = GenerateEmbeddings(
+            llm=TransformersLLM(
+                model="TaylorAI/bge-micro-v2",
+                model_kwargs={"is_decoder": True},
+                cuda_devices=[],
+            )
+        )
+        embedder.load()
+
+        result = next(
+            embedder.process(
+                [
+                    {"text": "Hello, how are you?"},
+                ]
+            )
+        )
+        ```
     """
 
     llm: LLM

diff --git a/src/distilabel/steps/tasks/genstruct.py b/src/distilabel/steps/tasks/genstruct.py
@@ -67,6 +67,42 @@ class Genstruct(Task):
     References:
         - [Genstruct 7B by Nous Research](https://huggingface.co/NousResearch/Genstruct-7B)
         - [Ada-Instruct: Adapting Instruction Generators for Complex Reasoning](https://arxiv.org/abs/2310.04484)
+
+    Examples:
+
+        Generate instructions from raw documents using the title and content:
+
+        ```python
+        from distilabel.steps.tasks import Genstruct
+        from distilabel.llms.huggingface import InferenceEndpointsLLM
+
+        # Consider this as a placeholder for your actual LLM.
+        genstruct = Genstruct(
+            llm=InferenceEndpointsLLM(
+                model_id="NousResearch/Genstruct-7B",
+            ),
+        )
+
+        genstruct.load()
+
+        result = next(
+            genstruct.process(
+                [
+                    {"title": "common instruction", "content": "content of the document"},
+                ]
+            )
+        )
+        # result
+        # [
+        #     {
+        #         'title': 'An instruction',
+        #         'content': 'content of the document',
+        #         'model_name': 'test',
+        #         'user': 'An instruction',
+        #         'assistant': 'content of the document',
+        #     }
+        # ]
+        ```
     """
 
     _template: Union[Template, None] = PrivateAttr(...)

diff --git a/src/distilabel/steps/tasks/pair_rm.py b/src/distilabel/steps/tasks/pair_rm.py
@@ -49,6 +49,37 @@ class PairRM(Step):
     Note:
         This step differs to other tasks as there is a single implementation of this model
         currently, and we will use a specific `LLM`.
+
+    Examples:
+
+        Rank LLM candidates:
+
+        ```python
+        from distilabel.steps.tasks import PairRM
+
+        # Consider this as a placeholder for your actual LLM.
+        pair_rm = PairRM()
+
+        pair_rm.load()
+
+        result = next(
+            scorer.process(
+                [
+                    {"input": "Hello, how are you?", "candidates": ["fine", "good", "bad"]},
+                ]
+            )
+        )
+        # result
+        # [
+        #     {
+        #         'input': 'Hello, how are you?',
+        #         'candidates': ['fine', 'good', 'bad'],
+        #         'ranks': [2, 1, 3],
+        #         'ranked_candidates': ['good', 'fine', 'bad'],
+        #         'model_name': 'llm-blender/PairRM',
+        #     }
+        # ]
+        ```
     """
 
     model: str = "llm-blender/PairRM"