Skip to content

Commit

Permalink
Drop: LoadHubDataset due to deprecation in 1.3
Browse files Browse the repository at this point in the history
  • Loading branch information
davidberenstein1957 committed Jun 26, 2024
1 parent 4c0f319 commit b9165ef
Show file tree
Hide file tree
Showing 9 changed files with 11 additions and 39 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,14 @@ Then run:
```python
from distilabel.llms import OpenAILLM
from distilabel.pipeline import Pipeline
from distilabel.steps import LoadHubDataset
from distilabel.steps import LoadDataFromHub
from distilabel.steps.tasks import TextGeneration

with Pipeline(
name="simple-text-generation-pipeline",
description="A simple text generation pipeline",
) as pipeline:
load_dataset = LoadHubDataset(output_mappings={"prompt": "instruction"})
load_dataset = LoadDataFromHub(output_mappings={"prompt": "instruction"})

generate_with_openai = TextGeneration(llm=OpenAILLM(model="gpt-3.5-turbo"))

Expand Down
4 changes: 2 additions & 2 deletions examples/arena_hard.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def process(self, inputs: StepInput) -> StepOutput: # type: ignore
from distilabel.steps import (
CombineColumns,
KeepColumns,
LoadHubDataset,
LoadDataFromHub,
StepInput,
step,
)
Expand Down Expand Up @@ -369,7 +369,7 @@ def LoadReference(*inputs: StepInput) -> StepOutput:
yield input

with Pipeline(name="arena-hard-v0.1") as pipeline:
load_dataset = LoadHubDataset(
load_dataset = LoadDataFromHub(
name="load_dataset",
repo_id="alvarobartt/lmsys-arena-hard-v0.1",
split="test",
Expand Down
8 changes: 4 additions & 4 deletions src/distilabel/pipeline/routing_batch_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def routing_batch_function(
```python
from distilabel.llms import MistralLLM, OpenAILLM, VertexAILLM
from distilabel.pipeline import Pipeline, routing_batch_function
from distilabel.steps import LoadHubDataset, CombineColumns
from distilabel.steps import LoadDataFromHub, CombineColumns
@routing_batch_function
Expand All @@ -260,7 +260,7 @@ def random_routing_batch(steps: List[str]) -> List[str]:
with Pipeline(name="routing-batch-function") as pipeline:
load_data = LoadHubDataset()
load_data = LoadDataFromHub()
generations = []
for llm in (
Expand Down Expand Up @@ -336,14 +336,14 @@ def sample_n_steps(n: int) -> RoutingBatchFunction:
```python
from distilabel.llms import MistralLLM, OpenAILLM, VertexAILLM
from distilabel.pipeline import Pipeline, sample_n_steps
from distilabel.steps import LoadHubDataset, CombineColumns
from distilabel.steps import LoadDataFromHub, CombineColumns
random_routing_batch = sample_n_steps(2)
with Pipeline(name="routing-batch-function") as pipeline:
load_data = LoadHubDataset()
load_data = LoadDataFromHub()
generations = []
for llm in (
Expand Down
2 changes: 0 additions & 2 deletions src/distilabel/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
LoadDataFromDisk,
LoadDataFromFileSystem,
LoadDataFromHub,
LoadHubDataset,
)
from distilabel.steps.globals.huggingface import PushToHub
from distilabel.steps.keep import KeepColumns
Expand All @@ -58,7 +57,6 @@
"LoadDataFromDisk",
"LoadDataFromFileSystem",
"LoadDataFromHub",
"LoadHubDataset",
"PushToHub",
"Step",
"StepInput",
Expand Down
11 changes: 0 additions & 11 deletions src/distilabel/steps/generators/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import warnings
from collections import defaultdict
from functools import cached_property
from pathlib import Path
Expand Down Expand Up @@ -242,16 +241,6 @@ def _dataset_info(self) -> Dict[str, DatasetInfo]:
return ds.info


class LoadHubDataset(LoadDataFromHub):
def __init__(self, **data: Any) -> None:
warnings.warn(
"`LoadHubDataset` is deprecated and will be removed in version 1.3.0, use `LoadDataFromHub` instead.",
DeprecationWarning,
stacklevel=2,
)
return super().__init__(**data)


class LoadDataFromFileSystem(LoadDataFromHub):
"""Loads a dataset from a file in your filesystem.
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_pipe_llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from distilabel.mixins.runtime_parameters import RuntimeParameter
from distilabel.pipeline.local import Pipeline
from distilabel.steps.base import Step, StepInput
from distilabel.steps.generators.huggingface import LoadHubDataset
from distilabel.steps.generators.huggingface import LoadDataFromHub
from distilabel.steps.tasks.text_generation import TextGeneration

if TYPE_CHECKING:
Expand Down Expand Up @@ -51,7 +51,7 @@ def process(self, *inputs: StepInput) -> "StepOutput":

def test_pipeline_with_llms_serde() -> None:
with Pipeline(name="unit-test-pipeline") as pipeline:
load_hub_dataset = LoadHubDataset(name="load_dataset")
load_hub_dataset = LoadDataFromHub(name="load_dataset")
rename_columns = RenameColumns(name="rename_columns")
load_hub_dataset.connect(rename_columns)

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/cli/test_pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pipeline:
only needed if the dataset has multiple configurations.
type_info:
module: distilabel.steps.generators.huggingface
name: LoadHubDataset
name: LoadDataFromHub
name: load_hub_dataset
- step:
name: text_generation_gpt
Expand Down
14 changes: 0 additions & 14 deletions tests/unit/steps/generators/test_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
LoadDataFromDisk,
LoadDataFromFileSystem,
LoadDataFromHub,
LoadHubDataset,
)

DISTILABEL_RUN_SLOW_TESTS = os.getenv("DISTILABEL_RUN_SLOW_TESTS", False)
Expand Down Expand Up @@ -183,16 +182,3 @@ def test_load_distiset_from_disk(self) -> None:
assert isinstance(generator_step_output, tuple)
assert isinstance(generator_step_output[1], bool)
assert len(generator_step_output[0]) == 3


def test_LoadHubDataset_deprecation_warning():
with pytest.deprecated_call():
LoadHubDataset(
repo_id="distilabel-internal-testing/instruction-dataset-mini",
split="test",
batch_size=2,
)
import distilabel
from packaging.version import Version

assert Version(distilabel.__version__) <= Version("1.3.0")
1 change: 0 additions & 1 deletion tests/unit/test_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def test_imports() -> None:
LoadDataFromDicts,
LoadDataFromHub,
LoadDataFromDisk,
LoadHubDataset,
PushToHub,
Step,
StepOutput,
Expand Down

0 comments on commit b9165ef

Please sign in to comment.