Skip to content

Commit

Permalink
Add basic examples for tasks to show in the components gallery (#724)
Browse files Browse the repository at this point in the history
  • Loading branch information
plaguss authored Jun 12, 2024
1 parent ae6d7fa commit ce8dde8
Show file tree
Hide file tree
Showing 15 changed files with 718 additions and 4 deletions.
26 changes: 26 additions & 0 deletions src/distilabel/steps/tasks/complexity_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,32 @@ class ComplexityScorer(Task):
References:
- [`What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning`](https://arxiv.org/abs/2312.15685)
Examples:
Evaluate the complexity of your instructions:
```python
from distilabel.steps.tasks import ComplexityScorer
from distilabel.llms.huggingface import InferenceEndpointsLLM
# Consider this as a placeholder for your actual LLM.
scorer = ComplexityScorer(
llm=InferenceEndpointsLLM(
model_id="mistralai/Mistral-7B-Instruct-v0.2",
)
)
scorer.load()
result = next(
scorer.process(
[{"instructions": ["plain instruction", "highly complex instruction"]}]
)
)
# result
# [{'instructions': ['plain instruction', 'highly complex instruction'], 'model_name': 'test', 'scores': [1, 5], 'distilabel_metadata': {'raw_output_complexity_scorer_0': 'output'}}]
```
"""

_template: Union[Template, None] = PrivateAttr(...)
Expand Down
80 changes: 80 additions & 0 deletions src/distilabel/steps/tasks/evol_instruct/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,86 @@ class EvolInstruct(Task):
References:
- [WizardLM: Empowering Large Language Models to Follow Complex Instructions](https://arxiv.org/abs/2304.12244)
- [GitHub: h2oai/h2o-wizardlm](https://github.com/h2oai/h2o-wizardlm)
Examples:
Evolve an instruction using an LLM:
```python
from distilabel.steps.tasks import EvolInstruct
from distilabel.llms.huggingface import InferenceEndpointsLLM
# Consider this as a placeholder for your actual LLM.
evol_instruct = EvolInstruct(
llm=InferenceEndpointsLLM(
model_id="mistralai/Mistral-7B-Instruct-v0.2",
),
num_evolutions=2,
)
evol_instruct.load()
result = next(evol_instruct.process([{"instruction": "common instruction"}]))
# result
# [{'instruction': 'common instruction', 'evolved_instruction': 'evolved instruction', 'model_name': 'model_name'}]
```
Keep the iterations of the evolutions:
```python
from distilabel.steps.tasks import EvolInstruct
from distilabel.llms.huggingface import InferenceEndpointsLLM
# Consider this as a placeholder for your actual LLM.
evol_instruct = EvolInstruct(
llm=InferenceEndpointsLLM(
model_id="mistralai/Mistral-7B-Instruct-v0.2",
),
num_evolutions=2,
store_evolutions=True,
)
evol_instruct.load()
result = next(evol_instruct.process([{"instruction": "common instruction"}]))
# result
# [
# {
# 'instruction': 'common instruction',
# 'evolved_instructions': ['initial evolution', 'final evolution'],
# 'model_name': 'model_name'
# }
# ]
```
Generate answers for the instructions in a single step:
```python
from distilabel.steps.tasks import EvolInstruct
from distilabel.llms.huggingface import InferenceEndpointsLLM
# Consider this as a placeholder for your actual LLM.
evol_instruct = EvolInstruct(
llm=InferenceEndpointsLLM(
model_id="mistralai/Mistral-7B-Instruct-v0.2",
),
num_evolutions=2,
generate_answers=True,
)
evol_instruct.load()
result = next(evol_instruct.process([{"instruction": "common instruction"}]))
# result
# [
# {
# 'instruction': 'common instruction',
# 'evolved_instruction': 'evolved instruction',
# 'answer': 'answer to the instruction',
# 'model_name': 'model_name'
# }
# ]
```
"""

num_evolutions: int
Expand Down
25 changes: 24 additions & 1 deletion src/distilabel/steps/tasks/evol_instruct/evol_complexity/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class EvolComplexity(EvolInstruct):
"""Evolve instructions to make them more complex using an `LLM`.
`EvolComplexity` is a task that evolves instructions to make them more complex,
and it is based in the EvolInstruct task, but using slight different prompts, but the
and it is based in the EvolInstruct task, using slight different prompts, but the
exact same evolutionary approach.
Attributes:
Expand Down Expand Up @@ -61,6 +61,29 @@ class EvolComplexity(EvolInstruct):
References:
- [What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning](https://arxiv.org/abs/2312.15685)
- [WizardLM: Empowering Large Language Models to Follow Complex Instructions](https://arxiv.org/abs/2304.12244)
Examples:
Evolve an instruction using an LLM:
```python
from distilabel.steps.tasks import EvolComplexity
from distilabel.llms.huggingface import InferenceEndpointsLLM
# Consider this as a placeholder for your actual LLM.
evol_complexity = EvolComplexity(
llm=InferenceEndpointsLLM(
model_id="mistralai/Mistral-7B-Instruct-v0.2",
),
num_evolutions=2,
)
evol_complexity.load()
result = next(evol_complexity.process([{"instruction": "common instruction"}]))
# result
# [{'instruction': 'common instruction', 'evolved_instruction': 'evolved instruction', 'model_name': 'model_name'}]
```
"""

mutation_templates: Dict[str, str] = MUTATION_TEMPLATES
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,29 @@ class EvolComplexityGenerator(EvolInstructGenerator):
References:
- [What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning](https://arxiv.org/abs/2312.15685)
- [WizardLM: Empowering Large Language Models to Follow Complex Instructions](https://arxiv.org/abs/2304.12244)
Examples:
Generate evolved instructions without initial instructions:
```python
from distilabel.steps.tasks import EvolComplexityGenerator
from distilabel.llms.huggingface import InferenceEndpointsLLM
# Consider this as a placeholder for your actual LLM.
evol_complexity_generator = EvolComplexityGenerator(
llm=InferenceEndpointsLLM(
model_id="mistralai/Mistral-7B-Instruct-v0.2",
),
num_instructions=2,
)
evol_complexity_generator.load()
result = next(scorer.process())
# result
# [{'instruction': 'generated instruction', 'model_name': 'test'}]
```
"""

mutation_templates: Dict[str, str] = GENERATION_MUTATION_TEMPLATES
23 changes: 23 additions & 0 deletions src/distilabel/steps/tasks/evol_instruct/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,29 @@ class EvolInstructGenerator(GeneratorTask):
References:
- [WizardLM: Empowering Large Language Models to Follow Complex Instructions](https://arxiv.org/abs/2304.12244)
- [GitHub: h2oai/h2o-wizardlm](https://github.com/h2oai/h2o-wizardlm)
Examples:
Generate evolved instructions without initial instructions:
```python
from distilabel.steps.tasks import EvolInstructGenerator
from distilabel.llms.huggingface import InferenceEndpointsLLM
# Consider this as a placeholder for your actual LLM.
evol_instruct_generator = EvolInstructGenerator(
llm=InferenceEndpointsLLM(
model_id="mistralai/Mistral-7B-Instruct-v0.2",
),
num_instructions=2,
)
evol_instruct_generator.load()
result = next(scorer.process())
# result
# [{'instruction': 'generated instruction', 'model_name': 'test'}]
```
"""

num_instructions: int
Expand Down
36 changes: 36 additions & 0 deletions src/distilabel/steps/tasks/evol_quality/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,42 @@ class EvolQuality(Task):
References:
- [`What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning`](https://arxiv.org/abs/2312.15685)
Examples:
Evolve the quality of the responses given a prompt:
```python
from distilabel.steps.tasks import EvolQuality
from distilabel.llms.huggingface import InferenceEndpointsLLM
# Consider this as a placeholder for your actual LLM.
evol_quality = EvolQuality(
llm=InferenceEndpointsLLM(
model_id="mistralai/Mistral-7B-Instruct-v0.2",
),
num_evolutions=2,
)
evol_quality.load()
result = next(
evol_quality.process(
[
{"instruction": "common instruction", "response": "a response"},
]
)
)
# result
# [
# {
# 'instruction': 'common instruction',
# 'response': 'a response',
# 'evolved_response': 'evolved response',
# 'model_name': '"mistralai/Mistral-7B-Instruct-v0.2"'
# }
# ]
```
"""

num_evolutions: int
Expand Down
27 changes: 27 additions & 0 deletions src/distilabel/steps/tasks/generate_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,33 @@ class GenerateEmbeddings(Step):
References:
- [What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning](https://arxiv.org/abs/2312.15685)
Examples:
Rank LLM candidates:
```python
from distilabel.steps.tasks import GenerateEmbeddings
from distilabel.llms.huggingface import TransformersLLM
# Consider this as a placeholder for your actual LLM.
embedder = GenerateEmbeddings(
llm=TransformersLLM(
model="TaylorAI/bge-micro-v2",
model_kwargs={"is_decoder": True},
cuda_devices=[],
)
)
embedder.load()
result = next(
embedder.process(
[
{"text": "Hello, how are you?"},
]
)
)
```
"""

llm: LLM
Expand Down
36 changes: 36 additions & 0 deletions src/distilabel/steps/tasks/genstruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,42 @@ class Genstruct(Task):
References:
- [Genstruct 7B by Nous Research](https://huggingface.co/NousResearch/Genstruct-7B)
- [Ada-Instruct: Adapting Instruction Generators for Complex Reasoning](https://arxiv.org/abs/2310.04484)
Examples:
Generate instructions from raw documents using the title and content:
```python
from distilabel.steps.tasks import Genstruct
from distilabel.llms.huggingface import InferenceEndpointsLLM
# Consider this as a placeholder for your actual LLM.
genstruct = Genstruct(
llm=InferenceEndpointsLLM(
model_id="NousResearch/Genstruct-7B",
),
)
genstruct.load()
result = next(
genstruct.process(
[
{"title": "common instruction", "content": "content of the document"},
]
)
)
# result
# [
# {
# 'title': 'An instruction',
# 'content': 'content of the document',
# 'model_name': 'test',
# 'user': 'An instruction',
# 'assistant': 'content of the document',
# }
# ]
```
"""

_template: Union[Template, None] = PrivateAttr(...)
Expand Down
31 changes: 31 additions & 0 deletions src/distilabel/steps/tasks/pair_rm.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,37 @@ class PairRM(Step):
Note:
This step differs to other tasks as there is a single implementation of this model
currently, and we will use a specific `LLM`.
Examples:
Rank LLM candidates:
```python
from distilabel.steps.tasks import PairRM
# Consider this as a placeholder for your actual LLM.
pair_rm = PairRM()
pair_rm.load()
result = next(
scorer.process(
[
{"input": "Hello, how are you?", "candidates": ["fine", "good", "bad"]},
]
)
)
# result
# [
# {
# 'input': 'Hello, how are you?',
# 'candidates': ['fine', 'good', 'bad'],
# 'ranks': [2, 1, 3],
# 'ranked_candidates': ['good', 'fine', 'bad'],
# 'model_name': 'llm-blender/PairRM',
# }
# ]
```
"""

model: str = "llm-blender/PairRM"
Expand Down
Loading

0 comments on commit ce8dde8

Please sign in to comment.