From c9ed5fde5e7bb18132a088df678f23e886d17e1d Mon Sep 17 00:00:00 2001 From: bikash119 Date: Tue, 24 Sep 2024 13:58:03 +0530 Subject: [PATCH 01/27] Support embeddings generation using llama_cpp --- .gitignore | 4 +- src/distilabel/embeddings/llamacpp.py | 129 +++++++++++++++++++++++++ tests/unit/conftest.py | 11 +++ tests/unit/embeddings/test_llamacpp.py | 92 ++++++++++++++++++ 4 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 src/distilabel/embeddings/llamacpp.py create mode 100644 tests/unit/embeddings/test_llamacpp.py diff --git a/.gitignore b/.gitignore index 42967a7edb..93707388c7 100644 --- a/.gitignore +++ b/.gitignore @@ -77,4 +77,6 @@ venv.bak/ # Other *.log *.swp -.DS_Store \ No newline at end of file +.DS_Store +#models +tests/model diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py new file mode 100644 index 0000000000..66d2c5444b --- /dev/null +++ b/src/distilabel/embeddings/llamacpp.py @@ -0,0 +1,129 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union + +from pydantic import PrivateAttr + +from distilabel.embeddings.base import Embeddings +from distilabel.llms.mixins.cuda_device_placement import CudaDevicePlacementMixin + +if TYPE_CHECKING: + from llama_cpp import Llama as _LlamaCpp + + +class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): + """`LlamaCpp` library implementation for embedding generation. + + Attributes: + model: the model Hugging Face Hub repo id or a path to a directory containing the + model weights and configuration files. + hub_repository_id: the Hugging Face Hub repository id. + _model: the `Llama` model instance. This attribute is meant to be used internally + and should not be accessed directly. It will be set in the `load` method. + + References: + - [Offline inference embeddings](https://llama-cpp-python.readthedocs.io/en/stable/#embeddings) + + Examples: + Generating sentence embeddings: + + ```python + from distilabel.embeddings import LlamaCppEmbeddings + + embeddings = LlamaCppEmbeddings(model="second-state/all-MiniLM-L6-v2-Q2_K.gguf") + + embeddings.load() + + results = embeddings.encode(inputs=["distilabel is awesome!", "and Argilla!"]) + # [ + # [-0.05447685346007347, -0.01623094454407692, ...], + # [4.4889533455716446e-05, 0.044016145169734955, ...], + # ] + ``` + """ + + model: str + hub_repository_id: Union[None, str] = None + disable_cuda_device_placement: bool = True + model_kwargs: Optional[Dict[str, Any]] = None + verbose: bool = False + _model: Union["_LlamaCpp", None] = PrivateAttr(None) + + def load(self) -> None: + """Loads the `gguf` model using either the path or the Hugging Face Hub repository id.""" + super().load() + + CudaDevicePlacementMixin.load(self) + + try: + from llama_cpp import Llama as _LlamaCpp + except ImportError as ie: + raise ImportError( + "`llama-cpp-python` package is not installed. Please install it using" + " `pip install llama-cpp-python`." + ) from ie + + if self.hub_repository_id is not None: + self._model = _LlamaCpp.from_pretrained( + repo_id=self.hub_repository_id, + filename=self.model, + verbose=self.verbose, + embedding=True, + ) + else: + try: + self._logger.info(f"Attempting to load model from: {self.model_name}") + self._model = _LlamaCpp( + model_path=self.model_name, + verbose=self.verbose, + embedding=True, + kwargs=self.model_kwargs, + ) + self._logger.info(f"self._model: {self._model}") + self._logger.info("Model loaded successfully") + except Exception as e: + self._logger.error(f"Failed to load model: {str(e)}") + raise + + def unload(self) -> None: + """Unloads the `gguf` model.""" + CudaDevicePlacementMixin.unload(self) + super().unload() + + @property + def model_name(self) -> str: + """Returns the name of the model.""" + return self.model + + def encode(self, inputs: List[str]) -> List[List[Union[int, float]]]: + """Generates embeddings for the provided inputs. + + Args: + inputs: a list of texts for which an embedding has to be generated. + + Returns: + The generated embeddings. + """ + if self._model is None: + self._logger.error("Model is not initialized") + raise ValueError( + "Model is not initialized. Please check the initialization process." + ) + + try: + return self._model.create_embedding(inputs)["data"] + except Exception as e: + print(f"Error creating embedding: {str(e)}") + raise diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 1903d10e3c..eb4eabc58e 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -102,3 +102,14 @@ class DummyTaskOfflineBatchGeneration(DummyTask): @pytest.fixture def dummy_llm() -> AsyncLLM: return DummyAsyncLLM() + + +@pytest.fixture +def local_llamacpp_model_path(): + """ + Fixture that provides the local model path for LlamaCpp testing. + + Returns: + str: The path to the local LlamaCpp model file. + """ + return "./tests/model/gguf/all-MiniLM-L6-v2-Q2_K.gguf" diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py new file mode 100644 index 0000000000..9b66230494 --- /dev/null +++ b/tests/unit/embeddings/test_llamacpp.py @@ -0,0 +1,92 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from distilabel.embeddings.llamacpp import LlamaCppEmbeddings + + +class TestLlamaCppEmbeddings: + model_name = "all-MiniLM-L6-v2-Q2_K.gguf" + repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" + + def test_model_name(self) -> None: + """ + Test if the model name is correctly set. + """ + embeddings = LlamaCppEmbeddings(model=self.model_name) + assert embeddings.model_name == self.model_name + + def test_encode(self, local_llamacpp_model_path) -> None: + """ + Test if the model can generate embeddings. + + Args: + local_llamacpp_model_path (str): Fixture providing the local model path. + """ + embeddings = LlamaCppEmbeddings(model=local_llamacpp_model_path) + inputs = [ + "Hello, how are you?", + "What a nice day!", + "I hear that llamas are very popular now.", + ] + embeddings.load() + results = embeddings.encode(inputs=inputs) + + for result in results: + assert len(result["embedding"]) == 384 + + def test_load_model_from_local(self, local_llamacpp_model_path): + """ + Test if the model can be loaded from a local file and generate embeddings. + + Args: + local_llamacpp_model_path (str): Fixture providing the local model path. + """ + embeddings = LlamaCppEmbeddings(model=local_llamacpp_model_path) + inputs = [ + "Hello, how are you?", + "What a nice day!", + "I hear that llamas are very popular now.", + ] + embeddings.load() + # Test if the model is loaded by generating an embedding + results = embeddings.encode(inputs=inputs) + + embeddings.load() + results = embeddings.encode(inputs=inputs) + + for result in results: + assert len(result["embedding"]) == 384 + + def test_load_model_from_repo(self): + """ + Test if the model can be loaded from a Hugging Face repository. + """ + embeddings = LlamaCppEmbeddings( + hub_repository_id=self.repo_id, model=self.model_name + ) + inputs = [ + "Hello, how are you?", + "What a nice day!", + "I hear that llamas are very popular now.", + ] + + embeddings.load() + # Test if the model is loaded by generating an embedding + results = embeddings.encode(inputs=inputs) + + embeddings.load() + results = embeddings.encode(inputs=inputs) + + for result in results: + assert len(result["embedding"]) == 384 From c3464bc782b6d6f11a0a18d3ad3eed30a266adef Mon Sep 17 00:00:00 2001 From: bikash119 Date: Tue, 24 Sep 2024 14:18:33 +0530 Subject: [PATCH 02/27] Added llama-cpp-python as optional dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 44404c683e..7d39bd46b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ vllm = [ "setuptools", ] sentence-transformers = ["sentence-transformers >= 3.0.0"] +llama_cpp_python = ["llama-cpp-python >= 0.2.90"] faiss-cpu = ["faiss-cpu >= 1.8.0"] faiss-gpu = ["faiss-gpu >= 1.7.2"] text-clustering = [ From 582ca407f6d2f78bbb118de1699a916953ba3311 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Wed, 25 Sep 2024 19:28:30 +0530 Subject: [PATCH 03/27] - Added normalize_embeddings argument to allow user to pass if the embeddings should be normalized - Added testcases to test normalize embeddings --- src/distilabel/embeddings/llamacpp.py | 86 ++++++++++++++++++++------ tests/unit/embeddings/test_llamacpp.py | 66 +++++++++++++++++--- 2 files changed, 127 insertions(+), 25 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 66d2c5444b..69de88424f 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, List, Union -from pydantic import PrivateAttr +from pydantic import Field, PrivateAttr from distilabel.embeddings.base import Embeddings from distilabel.llms.mixins.cuda_device_placement import CudaDevicePlacementMixin +from distilabel.mixins.runtime_parameters import RuntimeParameter if TYPE_CHECKING: from llama_cpp import Llama as _LlamaCpp @@ -27,9 +28,14 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): """`LlamaCpp` library implementation for embedding generation. Attributes: - model: the model Hugging Face Hub repo id or a path to a directory containing the - model weights and configuration files. + model: contains the path to the GGUF quantized model, compatible with the + installed version of the `llama.cpp` Python bindings. hub_repository_id: the Hugging Face Hub repository id. + verbose: whether to print verbose output. Defaults to `False`. + disable_cuda_device_placement: whether to disable CUDA device placement. Defaults to `True`. + normalize_embeddings: whether to normalize the embeddings. Defaults to `False`. + extra_kwargs: additional dictionary of keyword arguments that will be passed to the + `Llama` class of `llama_cpp` library. Defaults to `{}`. _model: the `Llama` model instance. This attribute is meant to be used internally and should not be accessed directly. It will be set in the `load` method. @@ -42,7 +48,11 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): ```python from distilabel.embeddings import LlamaCppEmbeddings - embeddings = LlamaCppEmbeddings(model="second-state/all-MiniLM-L6-v2-Q2_K.gguf") + embeddings = LlamaCppEmbeddings(model="/path/to/model.gguf") + + ## Hugging Face Hub + + ## embeddings = LlamaCppEmbeddings(hub_repository_id="second-state/All-MiniLM-L6-v2-Embedding-GGUF", model="all-MiniLM-L6-v2-Q2_K.gguf") embeddings.load() @@ -54,11 +64,30 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): ``` """ - model: str - hub_repository_id: Union[None, str] = None - disable_cuda_device_placement: bool = True - model_kwargs: Optional[Dict[str, Any]] = None - verbose: bool = False + model: RuntimeParameter[str] = Field( + default=None, + description="Contains the path to the GGUF quantized model, compatible with the installed version of the `llama.cpp` Python bindings.", + ) + hub_repository_id: RuntimeParameter[Union[None, str]] = Field( + default=None, + description="The Hugging Face Hub repository id.", + ) + disable_cuda_device_placement: RuntimeParameter[bool] = Field( + default=True, + description="Whether to disable CUDA device placement.", + ) + verbose: RuntimeParameter[bool] = Field( + default=False, + description="Whether to print verbose output from llama.cpp library.", + ) + extra_kwargs: RuntimeParameter[Dict[str, Any]] = Field( + default={}, + description="Additional dictionary of keyword arguments that will be passed to the `Llama` class of `llama_cpp` library.", + ) + normalize_embeddings: RuntimeParameter[bool] = Field( + default=False, + description="Whether to normalize the embeddings.", + ) _model: Union["_LlamaCpp", None] = PrivateAttr(None) def load(self) -> None: @@ -76,12 +105,32 @@ def load(self) -> None: ) from ie if self.hub_repository_id is not None: - self._model = _LlamaCpp.from_pretrained( - repo_id=self.hub_repository_id, - filename=self.model, - verbose=self.verbose, - embedding=True, - ) + try: + from huggingface_hub.utils import validate_repo_id + + validate_repo_id(self.hub_repository_id) + except ImportError as ie: + raise ImportError( + "Llama.from_pretrained requires the huggingface-hub package. " + "You can install it with `pip install huggingface-hub`." + ) from ie + try: + self._logger.info( + f"Attempting to load model from Hugging Face Hub: {self.hub_repository_id}" + ) + self._model = _LlamaCpp.from_pretrained( + repo_id=self.hub_repository_id, + filename=self.model, + verbose=self.verbose, + embedding=True, + kwargs=self.extra_kwargs, + ) + self._logger.info("Model loaded successfully from Hugging Face Hub") + except Exception as e: + self._logger.error( + f"Failed to load model from Hugging Face Hub: {str(e)}" + ) + raise else: try: self._logger.info(f"Attempting to load model from: {self.model_name}") @@ -89,7 +138,7 @@ def load(self) -> None: model_path=self.model_name, verbose=self.verbose, embedding=True, - kwargs=self.model_kwargs, + kwargs=self.extra_kwargs, ) self._logger.info(f"self._model: {self._model}") self._logger.info("Model loaded successfully") @@ -123,7 +172,8 @@ def encode(self, inputs: List[str]) -> List[List[Union[int, float]]]: ) try: - return self._model.create_embedding(inputs)["data"] + embeds = self._model.embed(inputs, normalize=self.normalize_embeddings) + return embeds except Exception as e: print(f"Error creating embedding: {str(e)}") raise diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py index 9b66230494..44782b6178 100644 --- a/tests/unit/embeddings/test_llamacpp.py +++ b/tests/unit/embeddings/test_llamacpp.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np + from distilabel.embeddings.llamacpp import LlamaCppEmbeddings @@ -43,7 +45,7 @@ def test_encode(self, local_llamacpp_model_path) -> None: results = embeddings.encode(inputs=inputs) for result in results: - assert len(result["embedding"]) == 384 + assert len(result) == 384 def test_load_model_from_local(self, local_llamacpp_model_path): """ @@ -62,18 +64,17 @@ def test_load_model_from_local(self, local_llamacpp_model_path): # Test if the model is loaded by generating an embedding results = embeddings.encode(inputs=inputs) - embeddings.load() - results = embeddings.encode(inputs=inputs) - for result in results: - assert len(result["embedding"]) == 384 + assert len(result) == 384 def test_load_model_from_repo(self): """ Test if the model can be loaded from a Hugging Face repository. """ embeddings = LlamaCppEmbeddings( - hub_repository_id=self.repo_id, model=self.model_name + hub_repository_id=self.repo_id, + model=self.model_name, + normalize_embeddings=True, ) inputs = [ "Hello, how are you?", @@ -85,8 +86,59 @@ def test_load_model_from_repo(self): # Test if the model is loaded by generating an embedding results = embeddings.encode(inputs=inputs) + for result in results: + assert len(result) == 384 + + def test_normalize_embeddings_true(self, local_llamacpp_model_path): + """ + Test if embeddings are normalized when normalize_embeddings is True. + """ + embeddings = LlamaCppEmbeddings( + model=local_llamacpp_model_path, normalize_embeddings=True + ) + embeddings.load() + + inputs = [ + "Hello, how are you?", + "What a nice day!", + "I hear that llamas are very popular now.", + ] + + results = embeddings.encode(inputs=inputs) + + for result in results: + # Check if the embedding is normalized (L2 norm should be close to 1) + norm = np.linalg.norm(result) + assert np.isclose( + norm, 1.0, atol=1e-6 + ), f"Norm is {norm}, expected close to 1.0" + + def test_normalize_embeddings_false(self, local_llamacpp_model_path): + """ + Test if embeddings are not normalized when normalize_embeddings is False. + """ + embeddings = LlamaCppEmbeddings( + model=local_llamacpp_model_path, normalize_embeddings=False + ) embeddings.load() + + inputs = [ + "Hello, how are you?", + "What a nice day!", + "I hear that llamas are very popular now.", + ] + results = embeddings.encode(inputs=inputs) for result in results: - assert len(result["embedding"]) == 384 + # Check if the embedding is not normalized (L2 norm should not be close to 1) + norm = np.linalg.norm(result) + assert not np.isclose( + norm, 1.0, atol=1e-6 + ), f"Norm is {norm}, expected not close to 1.0" + + # Additional check: ensure that at least one embedding has a norm significantly different from 1 + norms = [np.linalg.norm(result) for result in results] + assert any( + not np.isclose(norm, 1.0, atol=0.1) for norm in norms + ), "Expected at least one embedding with norm not close to 1.0" From fba8adacb9edea4429ab9f9e06c2977bc3876d93 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Thu, 26 Sep 2024 08:08:56 +0530 Subject: [PATCH 04/27] Update pyproject.toml Accept recommended suggestion Co-authored-by: David Berenstein --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7d39bd46b0..44404c683e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,7 +94,6 @@ vllm = [ "setuptools", ] sentence-transformers = ["sentence-transformers >= 3.0.0"] -llama_cpp_python = ["llama-cpp-python >= 0.2.90"] faiss-cpu = ["faiss-cpu >= 1.8.0"] faiss-gpu = ["faiss-gpu >= 1.7.2"] text-clustering = [ From e288b313e000ca375755a6c63f6f828d5849386d Mon Sep 17 00:00:00 2001 From: bikash119 Date: Thu, 26 Sep 2024 09:32:11 +0530 Subject: [PATCH 05/27] - Updated test to allow developer to define test model location. - Incorporated changes suggested in review comments. --- src/distilabel/embeddings/llamacpp.py | 13 +-------- src/distilabel/llms/llamacpp.py | 6 ++--- tests/unit/conftest.py | 38 ++++++++++++++++++++++++--- 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 69de88424f..6cdecc6eb7 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -165,15 +165,4 @@ def encode(self, inputs: List[str]) -> List[List[Union[int, float]]]: Returns: The generated embeddings. """ - if self._model is None: - self._logger.error("Model is not initialized") - raise ValueError( - "Model is not initialized. Please check the initialization process." - ) - - try: - embeds = self._model.embed(inputs, normalize=self.normalize_embeddings) - return embeds - except Exception as e: - print(f"Error creating embedding: {str(e)}") - raise + return self._model.embed(inputs, normalize=self.normalize_embeddings) diff --git a/src/distilabel/llms/llamacpp.py b/src/distilabel/llms/llamacpp.py index 9d158ea525..2d52f56ac8 100644 --- a/src/distilabel/llms/llamacpp.py +++ b/src/distilabel/llms/llamacpp.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union -from pydantic import Field, FilePath, PrivateAttr, validate_call +from pydantic import Field, PrivateAttr, validate_call from distilabel.llms.base import LLM from distilabel.llms.typing import GenerateOutput @@ -110,9 +110,7 @@ class User(BaseModel): ``` """ - model_path: RuntimeParameter[FilePath] = Field( - default=None, description="The path to the GGUF quantized model.", exclude=True - ) + model_path: str n_gpu_layers: RuntimeParameter[int] = Field( default=-1, description="The number of layers that will be loaded in the GPU.", diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index eb4eabc58e..f0db9d82a5 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from typing import TYPE_CHECKING, Any, Dict, List, Union +from urllib.request import urlretrieve import pytest @@ -105,11 +107,39 @@ def dummy_llm() -> AsyncLLM: @pytest.fixture -def local_llamacpp_model_path(): +def local_llamacpp_model_path(tmp_path): """ - Fixture that provides the local model path for LlamaCpp testing. + Fixture that provides the local model path for LlamaCpp testing and handles cleanup. - Returns: + The model path can be set using the LLAMACPP_TEST_MODEL_PATH environment variable. + If not set, it downloads a small test model to a temporary directory and cleans up after the test. + + Args: + tmp_path (Path): Pytest fixture providing a temporary directory path. + + Yields: str: The path to the local LlamaCpp model file. """ - return "./tests/model/gguf/all-MiniLM-L6-v2-Q2_K.gguf" + # Check for environment variable first + env_path = os.environ.get("LLAMACPP_TEST_MODEL_PATH") + if env_path: + yield env_path + return # No cleanup needed if env var is set + + # If env var not set, use a small test model + model_name = "all-MiniLM-L6-v2-Q2_K.gguf" + model_url = f"https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/{model_name}" + model_path = tmp_path / model_name + + if not model_path.exists(): + print(f"Downloading test model to {model_path}...") + urlretrieve(model_url, model_path) + print("Download complete.") + + yield str(model_path) + + # Cleanup + print(f"Cleaning up downloaded model at {model_path}...") + if model_path.exists(): + os.remove(model_path) + print("Cleanup complete.") From a936a39dd0a1708528bdaf0e0f0d4e8d6bb92438 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Thu, 26 Sep 2024 09:48:18 +0530 Subject: [PATCH 06/27] - Made the test session scope - use atexit to forcefully invoke cleanup --- tests/unit/conftest.py | 52 ++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index f0db9d82a5..8425801499 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import atexit import os from typing import TYPE_CHECKING, Any, Dict, List, Union from urllib.request import urlretrieve @@ -106,29 +107,49 @@ def dummy_llm() -> AsyncLLM: return DummyAsyncLLM() -@pytest.fixture -def local_llamacpp_model_path(tmp_path): +@pytest.fixture(scope="session") +def local_llamacpp_model_path(tmp_path_factory): """ - Fixture that provides the local model path for LlamaCpp testing and handles cleanup. + Session-scoped fixture that provides the local model path for LlamaCpp testing. The model path can be set using the LLAMACPP_TEST_MODEL_PATH environment variable. - If not set, it downloads a small test model to a temporary directory and cleans up after the test. + If not set, it downloads a small test model to a temporary directory. + The model is downloaded once per test session and cleaned up after all tests. + + To use a custom model: + 1. Set the LLAMACPP_TEST_MODEL_PATH environment variable to the path of your model file. + 2. Ensure the model file exists at the specified path. + + Example: + export LLAMACPP_TEST_MODEL_PATH="/path/to/your/model.gguf" Args: - tmp_path (Path): Pytest fixture providing a temporary directory path. + tmp_path_factory: Pytest fixture providing a temporary directory factory. - Yields: + Returns: str: The path to the local LlamaCpp model file. """ + print("\nLlamaCpp model path information:") + # Check for environment variable first env_path = os.environ.get("LLAMACPP_TEST_MODEL_PATH") if env_path: - yield env_path - return # No cleanup needed if env var is set + print(f"Using custom model path from LLAMACPP_TEST_MODEL_PATH: {env_path}") + if not os.path.exists(env_path): + raise FileNotFoundError( + f"Custom model file not found at {env_path}. Please ensure the file exists." + ) + return env_path + + print("LLAMACPP_TEST_MODEL_PATH not set. Using default test model.") + print( + "To use a custom model, set the LLAMACPP_TEST_MODEL_PATH environment variable to the path of your model file." + ) # If env var not set, use a small test model model_name = "all-MiniLM-L6-v2-Q2_K.gguf" model_url = f"https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/{model_name}" + tmp_path = tmp_path_factory.getbasetemp() model_path = tmp_path / model_name if not model_path.exists(): @@ -136,10 +157,13 @@ def local_llamacpp_model_path(tmp_path): urlretrieve(model_url, model_path) print("Download complete.") - yield str(model_path) + def cleanup(): + if model_path.exists(): + print(f"Cleaning up downloaded model at {model_path}...") + os.remove(model_path) + print("Cleanup complete.") + + # Register the cleanup function to be called at exit + atexit.register(cleanup) - # Cleanup - print(f"Cleaning up downloaded model at {model_path}...") - if model_path.exists(): - os.remove(model_path) - print("Cleanup complete.") + return str(model_path) From 316afa0e6566b0049a923829aadefd56ae949868 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Thu, 26 Sep 2024 10:10:59 +0530 Subject: [PATCH 07/27] - Reverted the changes made to model_path --- src/distilabel/llms/llamacpp.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/distilabel/llms/llamacpp.py b/src/distilabel/llms/llamacpp.py index 2d52f56ac8..9d158ea525 100644 --- a/src/distilabel/llms/llamacpp.py +++ b/src/distilabel/llms/llamacpp.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union -from pydantic import Field, PrivateAttr, validate_call +from pydantic import Field, FilePath, PrivateAttr, validate_call from distilabel.llms.base import LLM from distilabel.llms.typing import GenerateOutput @@ -110,7 +110,9 @@ class User(BaseModel): ``` """ - model_path: str + model_path: RuntimeParameter[FilePath] = Field( + default=None, description="The path to the GGUF quantized model.", exclude=True + ) n_gpu_layers: RuntimeParameter[int] = Field( default=-1, description="The number of layers that will be loaded in the GPU.", From 71378839136941dd7596f9144d64dcdff3f6da4d Mon Sep 17 00:00:00 2001 From: bikash119 Date: Thu, 26 Sep 2024 10:57:47 +0530 Subject: [PATCH 08/27] - Implement test_encode_batch to verify various batch sizes - Add test_encode_batch_consistency to ensure consistent results - Test large batch processing capability - Verify embedding dimensions and count for different batch sizes --- src/distilabel/embeddings/llamacpp.py | 53 +++++++++++-------- tests/unit/embeddings/test_llamacpp.py | 70 +++++++++++++++++++++++--- 2 files changed, 95 insertions(+), 28 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 6cdecc6eb7..39c1806f02 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -28,12 +28,16 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): """`LlamaCpp` library implementation for embedding generation. Attributes: - model: contains the path to the GGUF quantized model, compatible with the + model_path: contains the path to the GGUF quantized model, compatible with the installed version of the `llama.cpp` Python bindings. - hub_repository_id: the Hugging Face Hub repository id. + repo_id: the Hugging Face Hub repository id. verbose: whether to print verbose output. Defaults to `False`. + n_gpu_layers: number of layers to run on the GPU. Defaults to `-1` (use the GPU if available). disable_cuda_device_placement: whether to disable CUDA device placement. Defaults to `True`. normalize_embeddings: whether to normalize the embeddings. Defaults to `False`. + seed: RNG seed, -1 for random + n_ctx: Text context, 0 = from model + n_batch: Prompt processing maximum batch size extra_kwargs: additional dictionary of keyword arguments that will be passed to the `Llama` class of `llama_cpp` library. Defaults to `{}`. _model: the `Llama` model instance. This attribute is meant to be used internally @@ -52,7 +56,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): ## Hugging Face Hub - ## embeddings = LlamaCppEmbeddings(hub_repository_id="second-state/All-MiniLM-L6-v2-Embedding-GGUF", model="all-MiniLM-L6-v2-Q2_K.gguf") + ## embeddings = LlamaCppEmbeddings(repo_id="second-state/All-MiniLM-L6-v2-Embedding-GGUF", model="all-MiniLM-L6-v2-Q2_K.gguf") embeddings.load() @@ -64,14 +68,12 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): ``` """ - model: RuntimeParameter[str] = Field( - default=None, - description="Contains the path to the GGUF quantized model, compatible with the installed version of the `llama.cpp` Python bindings.", - ) - hub_repository_id: RuntimeParameter[Union[None, str]] = Field( + model_path: str + repo_id: RuntimeParameter[Union[None, str]] = Field( default=None, description="The Hugging Face Hub repository id.", ) + n_gpu_layers: int = 0 disable_cuda_device_placement: RuntimeParameter[bool] = Field( default=True, description="Whether to disable CUDA device placement.", @@ -80,14 +82,17 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): default=False, description="Whether to print verbose output from llama.cpp library.", ) - extra_kwargs: RuntimeParameter[Dict[str, Any]] = Field( - default={}, - description="Additional dictionary of keyword arguments that will be passed to the `Llama` class of `llama_cpp` library.", - ) normalize_embeddings: RuntimeParameter[bool] = Field( default=False, description="Whether to normalize the embeddings.", ) + seed: int = 4294967295 + n_ctx: int = 512 + n_batch: int = 512 + extra_kwargs: RuntimeParameter[Dict[str, Any]] = Field( + default={}, + description="Additional dictionary of keyword arguments that will be passed to the `Llama` class of `llama_cpp` library.", + ) _model: Union["_LlamaCpp", None] = PrivateAttr(None) def load(self) -> None: @@ -104,11 +109,11 @@ def load(self) -> None: " `pip install llama-cpp-python`." ) from ie - if self.hub_repository_id is not None: + if self.repo_id is not None: try: from huggingface_hub.utils import validate_repo_id - validate_repo_id(self.hub_repository_id) + validate_repo_id(self.repo_id) except ImportError as ie: raise ImportError( "Llama.from_pretrained requires the huggingface-hub package. " @@ -116,11 +121,15 @@ def load(self) -> None: ) from ie try: self._logger.info( - f"Attempting to load model from Hugging Face Hub: {self.hub_repository_id}" + f"Attempting to load model from Hugging Face Hub: {self.repo_id}" ) self._model = _LlamaCpp.from_pretrained( - repo_id=self.hub_repository_id, - filename=self.model, + repo_id=self.repo_id, + filename=self.model_path, + n_gpu_layers=self.n_gpu_layers, + seed=self.seed, + n_ctx=self.n_ctx, + n_batch=self.n_batch, verbose=self.verbose, embedding=True, kwargs=self.extra_kwargs, @@ -133,9 +142,13 @@ def load(self) -> None: raise else: try: - self._logger.info(f"Attempting to load model from: {self.model_name}") + self._logger.info(f"Attempting to load model from: {self.model_path}") self._model = _LlamaCpp( - model_path=self.model_name, + model_path=self.model_path, + seed=self.seed, + n_gpu_layers=self.n_gpu_layers, + n_ctx=self.n_ctx, + n_batch=self.n_batch, verbose=self.verbose, embedding=True, kwargs=self.extra_kwargs, @@ -154,7 +167,7 @@ def unload(self) -> None: @property def model_name(self) -> str: """Returns the name of the model.""" - return self.model + return self.model_path def encode(self, inputs: List[str]) -> List[List[Union[int, float]]]: """Generates embeddings for the provided inputs. diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py index 44782b6178..f7cc9eb1af 100644 --- a/tests/unit/embeddings/test_llamacpp.py +++ b/tests/unit/embeddings/test_llamacpp.py @@ -25,8 +25,8 @@ def test_model_name(self) -> None: """ Test if the model name is correctly set. """ - embeddings = LlamaCppEmbeddings(model=self.model_name) - assert embeddings.model_name == self.model_name + embeddings = LlamaCppEmbeddings(model_path=self.model_name) + assert embeddings.model_path == self.model_name def test_encode(self, local_llamacpp_model_path) -> None: """ @@ -35,7 +35,7 @@ def test_encode(self, local_llamacpp_model_path) -> None: Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model=local_llamacpp_model_path) + embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) inputs = [ "Hello, how are you?", "What a nice day!", @@ -54,7 +54,7 @@ def test_load_model_from_local(self, local_llamacpp_model_path): Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model=local_llamacpp_model_path) + embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) inputs = [ "Hello, how are you?", "What a nice day!", @@ -72,8 +72,8 @@ def test_load_model_from_repo(self): Test if the model can be loaded from a Hugging Face repository. """ embeddings = LlamaCppEmbeddings( - hub_repository_id=self.repo_id, - model=self.model_name, + repo_id=self.repo_id, + model_path=self.model_name, normalize_embeddings=True, ) inputs = [ @@ -94,7 +94,7 @@ def test_normalize_embeddings_true(self, local_llamacpp_model_path): Test if embeddings are normalized when normalize_embeddings is True. """ embeddings = LlamaCppEmbeddings( - model=local_llamacpp_model_path, normalize_embeddings=True + model_path=local_llamacpp_model_path, normalize_embeddings=True ) embeddings.load() @@ -118,7 +118,7 @@ def test_normalize_embeddings_false(self, local_llamacpp_model_path): Test if embeddings are not normalized when normalize_embeddings is False. """ embeddings = LlamaCppEmbeddings( - model=local_llamacpp_model_path, normalize_embeddings=False + model_path=local_llamacpp_model_path, normalize_embeddings=False ) embeddings.load() @@ -142,3 +142,57 @@ def test_normalize_embeddings_false(self, local_llamacpp_model_path): assert any( not np.isclose(norm, 1.0, atol=0.1) for norm in norms ), "Expected at least one embedding with norm not close to 1.0" + + def test_encode_batch(self, local_llamacpp_model_path) -> None: + """ + Test if the model can generate embeddings for batches of inputs. + + Args: + local_llamacpp_model_path (str): Fixture providing the local model path. + """ + embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) + embeddings.load() + + # Test with different batch sizes + batch_sizes = [1, 2, 5, 10] + for batch_size in batch_sizes: + inputs = [f"This is test sentence {i}" for i in range(batch_size)] + results = embeddings.encode(inputs=inputs) + + assert ( + len(results) == batch_size + ), f"Expected {batch_size} results, got {len(results)}" + for result in results: + assert ( + len(result) == 384 + ), f"Expected embedding dimension 384, got {len(result)}" + + # Test with a large batch to ensure it doesn't cause issues + large_batch = ["Large batch test" for _ in range(100)] + large_results = embeddings.encode(inputs=large_batch) + assert ( + len(large_results) == 100 + ), f"Expected 100 results for large batch, got {len(large_results)}" + + def test_encode_batch_consistency(self, local_llamacpp_model_path) -> None: + """ + Test if the model produces consistent embeddings for the same input in different batch sizes. + + Args: + local_llamacpp_model_path (str): Fixture providing the local model path. + """ + embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) + embeddings.load() + + input_text = "This is a test sentence for consistency" + + # Generate embedding individually + single_result = embeddings.encode([input_text])[0] + + # Generate embedding as part of a batch + batch_result = embeddings.encode([input_text, "Another sentence"])[0] + + # Compare the embeddings + assert np.allclose( + single_result, batch_result, atol=1e-5 + ), "Embeddings are not consistent between single and batch processing" From 2d0aa76a3ad077e5422389c98188391554032127 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Thu, 26 Sep 2024 13:02:39 +0530 Subject: [PATCH 09/27] - Included LlamaCppEmbeddings to __ini__.py --- src/distilabel/embeddings/__init__.py | 2 ++ tests/unit/embeddings/test_llamacpp.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/distilabel/embeddings/__init__.py b/src/distilabel/embeddings/__init__.py index 190ea70e50..1b940d0230 100644 --- a/src/distilabel/embeddings/__init__.py +++ b/src/distilabel/embeddings/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. from distilabel.embeddings.base import Embeddings +from distilabel.embeddings.llamacpp import LlamaCppEmbeddings from distilabel.embeddings.sentence_transformers import SentenceTransformerEmbeddings from distilabel.embeddings.vllm import vLLMEmbeddings @@ -20,4 +21,5 @@ "Embeddings", "SentenceTransformerEmbeddings", "vLLMEmbeddings", + "LlamaCppEmbeddings", ] diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py index f7cc9eb1af..248ce88fdc 100644 --- a/tests/unit/embeddings/test_llamacpp.py +++ b/tests/unit/embeddings/test_llamacpp.py @@ -14,7 +14,7 @@ import numpy as np -from distilabel.embeddings.llamacpp import LlamaCppEmbeddings +from distilabel.embeddings import LlamaCppEmbeddings class TestLlamaCppEmbeddings: From 778532f78bd8ebb3cd7e4705b4fb3ae0d2843af3 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Tue, 1 Oct 2024 03:55:17 +0530 Subject: [PATCH 10/27] - Use HF_TOKEN to download model from hub to generate embeddings. --- src/distilabel/embeddings/llamacpp.py | 56 ++++++++++++++++++++------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 39c1806f02..fc06417b4b 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import tempfile from typing import TYPE_CHECKING, Any, Dict, List, Union from pydantic import Field, PrivateAttr @@ -31,6 +33,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): model_path: contains the path to the GGUF quantized model, compatible with the installed version of the `llama.cpp` Python bindings. repo_id: the Hugging Face Hub repository id. + hf_token: Hugging Face token for accessing gated models. verbose: whether to print verbose output. Defaults to `False`. n_gpu_layers: number of layers to run on the GPU. Defaults to `-1` (use the GPU if available). disable_cuda_device_placement: whether to disable CUDA device placement. Defaults to `True`. @@ -73,6 +76,10 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): default=None, description="The Hugging Face Hub repository id.", ) + hf_token: RuntimeParameter[Union[None, str]] = Field( + default=None, + description="Hugging Face token for accessing gated models.", + ) n_gpu_layers: int = 0 disable_cuda_device_placement: RuntimeParameter[bool] = Field( default=True, @@ -96,7 +103,11 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): _model: Union["_LlamaCpp", None] = PrivateAttr(None) def load(self) -> None: - """Loads the `gguf` model using either the path or the Hugging Face Hub repository id.""" + """ + Loads the `gguf` model using either the path or the Hugging Face Hub repository id. + If using Hugging Face Hub, the model will be downloaded to a local directory + specified by the DISTILABEL_MODEL_DIR environment variable or to a temporary directory. + """ super().load() CudaDevicePlacementMixin.load(self) @@ -111,34 +122,52 @@ def load(self) -> None: if self.repo_id is not None: try: + from huggingface_hub import hf_hub_download from huggingface_hub.utils import validate_repo_id - - validate_repo_id(self.repo_id) except ImportError as ie: raise ImportError( "Llama.from_pretrained requires the huggingface-hub package. " "You can install it with `pip install huggingface-hub`." ) from ie + + validate_repo_id(self.repo_id) + + # Determine the download directory + download_dir = os.environ.get("DISTILABEL_MODEL_DIR") + if download_dir is None: + download_dir = tempfile.gettempdir() + + self._logger.info( + f"Attempting to download model from Hugging Face Hub: {self.repo_id}" + ) try: - self._logger.info( - f"Attempting to load model from Hugging Face Hub: {self.repo_id}" - ) - self._model = _LlamaCpp.from_pretrained( + model_path = hf_hub_download( repo_id=self.repo_id, filename=self.model_path, + token=self.hf_token, + local_dir=download_dir, + ) + self._logger.info(f"Model downloaded successfully to: {model_path}") + except Exception as e: + self._logger.error( + f"Failed to download model from Hugging Face Hub: {str(e)}" + ) + raise + + try: + self._model = _LlamaCpp( + model_path=model_path, n_gpu_layers=self.n_gpu_layers, seed=self.seed, n_ctx=self.n_ctx, n_batch=self.n_batch, verbose=self.verbose, embedding=True, - kwargs=self.extra_kwargs, + **self.extra_kwargs, ) - self._logger.info("Model loaded successfully from Hugging Face Hub") + self._logger.info("Model loaded successfully") except Exception as e: - self._logger.error( - f"Failed to load model from Hugging Face Hub: {str(e)}" - ) + self._logger.error(f"Failed to load model: {str(e)}") raise else: try: @@ -151,9 +180,8 @@ def load(self) -> None: n_batch=self.n_batch, verbose=self.verbose, embedding=True, - kwargs=self.extra_kwargs, + **self.extra_kwargs, ) - self._logger.info(f"self._model: {self._model}") self._logger.info("Model loaded successfully") except Exception as e: self._logger.error(f"Failed to load model: {str(e)}") From 55c3a0d8f34b8c7e206e0a20e3f09ccc5746a2a7 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Wed, 2 Oct 2024 16:43:21 +0530 Subject: [PATCH 11/27] - Download from hub is now available through mixin --- src/distilabel/embeddings/llamacpp.py | 102 ++++++------------------ src/distilabel/mixins/hub_downloader.py | 89 +++++++++++++++++++++ tests/unit/embeddings/test_llamacpp.py | 18 ++--- 3 files changed, 121 insertions(+), 88 deletions(-) create mode 100644 src/distilabel/mixins/hub_downloader.py diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index fc06417b4b..22f12bd024 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -12,21 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import tempfile from typing import TYPE_CHECKING, Any, Dict, List, Union from pydantic import Field, PrivateAttr from distilabel.embeddings.base import Embeddings from distilabel.llms.mixins.cuda_device_placement import CudaDevicePlacementMixin +from distilabel.mixins.hub_downloader import HuggingFaceModelLoaderMixin from distilabel.mixins.runtime_parameters import RuntimeParameter if TYPE_CHECKING: from llama_cpp import Llama as _LlamaCpp -class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): +class LlamaCppEmbeddings( + Embeddings, CudaDevicePlacementMixin, HuggingFaceModelLoaderMixin +): """`LlamaCpp` library implementation for embedding generation. Attributes: @@ -71,16 +72,8 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): ``` """ - model_path: str - repo_id: RuntimeParameter[Union[None, str]] = Field( - default=None, - description="The Hugging Face Hub repository id.", - ) - hf_token: RuntimeParameter[Union[None, str]] = Field( - default=None, - description="Hugging Face token for accessing gated models.", - ) - n_gpu_layers: int = 0 + model_file: str + n_gpu_layers: RuntimeParameter[int] = Field(default=0, description="Numbe of gpu") disable_cuda_device_placement: RuntimeParameter[bool] = Field( default=True, description="Whether to disable CUDA device placement.", @@ -120,72 +113,23 @@ def load(self) -> None: " `pip install llama-cpp-python`." ) from ie - if self.repo_id is not None: - try: - from huggingface_hub import hf_hub_download - from huggingface_hub.utils import validate_repo_id - except ImportError as ie: - raise ImportError( - "Llama.from_pretrained requires the huggingface-hub package. " - "You can install it with `pip install huggingface-hub`." - ) from ie - - validate_repo_id(self.repo_id) - - # Determine the download directory - download_dir = os.environ.get("DISTILABEL_MODEL_DIR") - if download_dir is None: - download_dir = tempfile.gettempdir() - - self._logger.info( - f"Attempting to download model from Hugging Face Hub: {self.repo_id}" + model_path = self.download_model() + try: + self._logger.info(f"Attempting to load model from: {self.model_file}") + self._model = _LlamaCpp( + model_path=model_path, + seed=self.seed, + n_gpu_layers=self.n_gpu_layers, + n_ctx=self.n_ctx, + n_batch=self.n_batch, + verbose=self.verbose, + embedding=True, + **self.extra_kwargs, ) - try: - model_path = hf_hub_download( - repo_id=self.repo_id, - filename=self.model_path, - token=self.hf_token, - local_dir=download_dir, - ) - self._logger.info(f"Model downloaded successfully to: {model_path}") - except Exception as e: - self._logger.error( - f"Failed to download model from Hugging Face Hub: {str(e)}" - ) - raise - - try: - self._model = _LlamaCpp( - model_path=model_path, - n_gpu_layers=self.n_gpu_layers, - seed=self.seed, - n_ctx=self.n_ctx, - n_batch=self.n_batch, - verbose=self.verbose, - embedding=True, - **self.extra_kwargs, - ) - self._logger.info("Model loaded successfully") - except Exception as e: - self._logger.error(f"Failed to load model: {str(e)}") - raise - else: - try: - self._logger.info(f"Attempting to load model from: {self.model_path}") - self._model = _LlamaCpp( - model_path=self.model_path, - seed=self.seed, - n_gpu_layers=self.n_gpu_layers, - n_ctx=self.n_ctx, - n_batch=self.n_batch, - verbose=self.verbose, - embedding=True, - **self.extra_kwargs, - ) - self._logger.info("Model loaded successfully") - except Exception as e: - self._logger.error(f"Failed to load model: {str(e)}") - raise + self._logger.info("Model loaded successfully") + except Exception as e: + self._logger.error(f"Failed to load model: {str(e)}") + raise def unload(self) -> None: """Unloads the `gguf` model.""" @@ -195,7 +139,7 @@ def unload(self) -> None: @property def model_name(self) -> str: """Returns the name of the model.""" - return self.model_path + return self.model_file def encode(self, inputs: List[str]) -> List[List[Union[int, float]]]: """Generates embeddings for the provided inputs. diff --git a/src/distilabel/mixins/hub_downloader.py b/src/distilabel/mixins/hub_downloader.py new file mode 100644 index 0000000000..f58986fc56 --- /dev/null +++ b/src/distilabel/mixins/hub_downloader.py @@ -0,0 +1,89 @@ +# Copyright 2023-present, Argilla, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +from typing import Optional + +from pydantic import BaseModel, Field + + +class HuggingFaceModelLoaderMixin(BaseModel): + """ + A mixin for downloading models from the Hugging Face Hub. + + Attributes: + repo_id (Optional[str]): The Hugging Face Hub repository id. + model_file (str): The name of the model file to download. + hf_token (Optional[str]): Hugging Face token for accessing gated models. + """ + + repo_id: Optional[str] = Field( + default=None, + description="The Hugging Face Hub repository id.", + ) + model_file: str = Field( + description="The name of the model file to download.", + ) + hf_token: Optional[str] = Field( + default=None, + description="Hugging Face token for accessing gated models.", + ) + + def download_model(self) -> str: + """ + Downloads the model from Hugging Face Hub if repo_id is provided. + + Returns: + str: The path to the downloaded or local model file. + + Raises: + ImportError: If huggingface_hub is not installed. + ValueError: If repo_id is not provided or invalid. + Exception: If there's an error downloading or loading the model. + """ + if self.repo_id is None: + return self.model_file + + try: + from huggingface_hub import hf_hub_download + from huggingface_hub.utils import validate_repo_id + except ImportError as ie: + raise ImportError( + "huggingface_hub package is not installed. " + "You can install it with `pip install huggingface_hub`." + ) from ie + + try: + validate_repo_id(self.repo_id) + except ValueError as ve: + raise ValueError(f"Invalid repo_id: {self.repo_id}") from ve + + # Determine the download directory + download_dir = os.environ.get("DISTILABEL_MODEL_DIR") + if download_dir is None: + download_dir = tempfile.gettempdir() + + try: + model_path = hf_hub_download( + repo_id=self.repo_id, + filename=self.model_file, + token=self.hf_token, + local_dir=download_dir, + ) + return model_path + except Exception as e: + raise Exception( + f"Failed to download model from Hugging Face Hub: {str(e)}" + ) from e diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py index 248ce88fdc..403250a352 100644 --- a/tests/unit/embeddings/test_llamacpp.py +++ b/tests/unit/embeddings/test_llamacpp.py @@ -25,8 +25,8 @@ def test_model_name(self) -> None: """ Test if the model name is correctly set. """ - embeddings = LlamaCppEmbeddings(model_path=self.model_name) - assert embeddings.model_path == self.model_name + embeddings = LlamaCppEmbeddings(model_file=self.model_name) + assert embeddings.model_file == self.model_name def test_encode(self, local_llamacpp_model_path) -> None: """ @@ -35,7 +35,7 @@ def test_encode(self, local_llamacpp_model_path) -> None: Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) + embeddings = LlamaCppEmbeddings(model_file=local_llamacpp_model_path) inputs = [ "Hello, how are you?", "What a nice day!", @@ -54,7 +54,7 @@ def test_load_model_from_local(self, local_llamacpp_model_path): Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) + embeddings = LlamaCppEmbeddings(model_file=local_llamacpp_model_path) inputs = [ "Hello, how are you?", "What a nice day!", @@ -73,7 +73,7 @@ def test_load_model_from_repo(self): """ embeddings = LlamaCppEmbeddings( repo_id=self.repo_id, - model_path=self.model_name, + model_file=self.model_name, normalize_embeddings=True, ) inputs = [ @@ -94,7 +94,7 @@ def test_normalize_embeddings_true(self, local_llamacpp_model_path): Test if embeddings are normalized when normalize_embeddings is True. """ embeddings = LlamaCppEmbeddings( - model_path=local_llamacpp_model_path, normalize_embeddings=True + model_file=local_llamacpp_model_path, normalize_embeddings=True ) embeddings.load() @@ -118,7 +118,7 @@ def test_normalize_embeddings_false(self, local_llamacpp_model_path): Test if embeddings are not normalized when normalize_embeddings is False. """ embeddings = LlamaCppEmbeddings( - model_path=local_llamacpp_model_path, normalize_embeddings=False + model_file=local_llamacpp_model_path, normalize_embeddings=False ) embeddings.load() @@ -150,7 +150,7 @@ def test_encode_batch(self, local_llamacpp_model_path) -> None: Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) + embeddings = LlamaCppEmbeddings(model_file=local_llamacpp_model_path) embeddings.load() # Test with different batch sizes @@ -181,7 +181,7 @@ def test_encode_batch_consistency(self, local_llamacpp_model_path) -> None: Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) + embeddings = LlamaCppEmbeddings(model_file=local_llamacpp_model_path) embeddings.load() input_text = "This is a test sentence for consistency" From 935cdb8f484d3ce12c97ba4e28ab4cd0c797a2b2 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Thu, 3 Oct 2024 15:43:28 +0530 Subject: [PATCH 12/27] Revert "- Download from hub is now available through mixin" This reverts commit 55c3a0d8f34b8c7e206e0a20e3f09ccc5746a2a7. --- src/distilabel/embeddings/llamacpp.py | 102 ++++++++++++++++++------ src/distilabel/mixins/hub_downloader.py | 89 --------------------- tests/unit/embeddings/test_llamacpp.py | 18 ++--- 3 files changed, 88 insertions(+), 121 deletions(-) delete mode 100644 src/distilabel/mixins/hub_downloader.py diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 22f12bd024..fc06417b4b 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -12,22 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import tempfile from typing import TYPE_CHECKING, Any, Dict, List, Union from pydantic import Field, PrivateAttr from distilabel.embeddings.base import Embeddings from distilabel.llms.mixins.cuda_device_placement import CudaDevicePlacementMixin -from distilabel.mixins.hub_downloader import HuggingFaceModelLoaderMixin from distilabel.mixins.runtime_parameters import RuntimeParameter if TYPE_CHECKING: from llama_cpp import Llama as _LlamaCpp -class LlamaCppEmbeddings( - Embeddings, CudaDevicePlacementMixin, HuggingFaceModelLoaderMixin -): +class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): """`LlamaCpp` library implementation for embedding generation. Attributes: @@ -72,8 +71,16 @@ class LlamaCppEmbeddings( ``` """ - model_file: str - n_gpu_layers: RuntimeParameter[int] = Field(default=0, description="Numbe of gpu") + model_path: str + repo_id: RuntimeParameter[Union[None, str]] = Field( + default=None, + description="The Hugging Face Hub repository id.", + ) + hf_token: RuntimeParameter[Union[None, str]] = Field( + default=None, + description="Hugging Face token for accessing gated models.", + ) + n_gpu_layers: int = 0 disable_cuda_device_placement: RuntimeParameter[bool] = Field( default=True, description="Whether to disable CUDA device placement.", @@ -113,23 +120,72 @@ def load(self) -> None: " `pip install llama-cpp-python`." ) from ie - model_path = self.download_model() - try: - self._logger.info(f"Attempting to load model from: {self.model_file}") - self._model = _LlamaCpp( - model_path=model_path, - seed=self.seed, - n_gpu_layers=self.n_gpu_layers, - n_ctx=self.n_ctx, - n_batch=self.n_batch, - verbose=self.verbose, - embedding=True, - **self.extra_kwargs, + if self.repo_id is not None: + try: + from huggingface_hub import hf_hub_download + from huggingface_hub.utils import validate_repo_id + except ImportError as ie: + raise ImportError( + "Llama.from_pretrained requires the huggingface-hub package. " + "You can install it with `pip install huggingface-hub`." + ) from ie + + validate_repo_id(self.repo_id) + + # Determine the download directory + download_dir = os.environ.get("DISTILABEL_MODEL_DIR") + if download_dir is None: + download_dir = tempfile.gettempdir() + + self._logger.info( + f"Attempting to download model from Hugging Face Hub: {self.repo_id}" ) - self._logger.info("Model loaded successfully") - except Exception as e: - self._logger.error(f"Failed to load model: {str(e)}") - raise + try: + model_path = hf_hub_download( + repo_id=self.repo_id, + filename=self.model_path, + token=self.hf_token, + local_dir=download_dir, + ) + self._logger.info(f"Model downloaded successfully to: {model_path}") + except Exception as e: + self._logger.error( + f"Failed to download model from Hugging Face Hub: {str(e)}" + ) + raise + + try: + self._model = _LlamaCpp( + model_path=model_path, + n_gpu_layers=self.n_gpu_layers, + seed=self.seed, + n_ctx=self.n_ctx, + n_batch=self.n_batch, + verbose=self.verbose, + embedding=True, + **self.extra_kwargs, + ) + self._logger.info("Model loaded successfully") + except Exception as e: + self._logger.error(f"Failed to load model: {str(e)}") + raise + else: + try: + self._logger.info(f"Attempting to load model from: {self.model_path}") + self._model = _LlamaCpp( + model_path=self.model_path, + seed=self.seed, + n_gpu_layers=self.n_gpu_layers, + n_ctx=self.n_ctx, + n_batch=self.n_batch, + verbose=self.verbose, + embedding=True, + **self.extra_kwargs, + ) + self._logger.info("Model loaded successfully") + except Exception as e: + self._logger.error(f"Failed to load model: {str(e)}") + raise def unload(self) -> None: """Unloads the `gguf` model.""" @@ -139,7 +195,7 @@ def unload(self) -> None: @property def model_name(self) -> str: """Returns the name of the model.""" - return self.model_file + return self.model_path def encode(self, inputs: List[str]) -> List[List[Union[int, float]]]: """Generates embeddings for the provided inputs. diff --git a/src/distilabel/mixins/hub_downloader.py b/src/distilabel/mixins/hub_downloader.py deleted file mode 100644 index f58986fc56..0000000000 --- a/src/distilabel/mixins/hub_downloader.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2023-present, Argilla, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import tempfile -from typing import Optional - -from pydantic import BaseModel, Field - - -class HuggingFaceModelLoaderMixin(BaseModel): - """ - A mixin for downloading models from the Hugging Face Hub. - - Attributes: - repo_id (Optional[str]): The Hugging Face Hub repository id. - model_file (str): The name of the model file to download. - hf_token (Optional[str]): Hugging Face token for accessing gated models. - """ - - repo_id: Optional[str] = Field( - default=None, - description="The Hugging Face Hub repository id.", - ) - model_file: str = Field( - description="The name of the model file to download.", - ) - hf_token: Optional[str] = Field( - default=None, - description="Hugging Face token for accessing gated models.", - ) - - def download_model(self) -> str: - """ - Downloads the model from Hugging Face Hub if repo_id is provided. - - Returns: - str: The path to the downloaded or local model file. - - Raises: - ImportError: If huggingface_hub is not installed. - ValueError: If repo_id is not provided or invalid. - Exception: If there's an error downloading or loading the model. - """ - if self.repo_id is None: - return self.model_file - - try: - from huggingface_hub import hf_hub_download - from huggingface_hub.utils import validate_repo_id - except ImportError as ie: - raise ImportError( - "huggingface_hub package is not installed. " - "You can install it with `pip install huggingface_hub`." - ) from ie - - try: - validate_repo_id(self.repo_id) - except ValueError as ve: - raise ValueError(f"Invalid repo_id: {self.repo_id}") from ve - - # Determine the download directory - download_dir = os.environ.get("DISTILABEL_MODEL_DIR") - if download_dir is None: - download_dir = tempfile.gettempdir() - - try: - model_path = hf_hub_download( - repo_id=self.repo_id, - filename=self.model_file, - token=self.hf_token, - local_dir=download_dir, - ) - return model_path - except Exception as e: - raise Exception( - f"Failed to download model from Hugging Face Hub: {str(e)}" - ) from e diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py index 403250a352..248ce88fdc 100644 --- a/tests/unit/embeddings/test_llamacpp.py +++ b/tests/unit/embeddings/test_llamacpp.py @@ -25,8 +25,8 @@ def test_model_name(self) -> None: """ Test if the model name is correctly set. """ - embeddings = LlamaCppEmbeddings(model_file=self.model_name) - assert embeddings.model_file == self.model_name + embeddings = LlamaCppEmbeddings(model_path=self.model_name) + assert embeddings.model_path == self.model_name def test_encode(self, local_llamacpp_model_path) -> None: """ @@ -35,7 +35,7 @@ def test_encode(self, local_llamacpp_model_path) -> None: Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_file=local_llamacpp_model_path) + embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) inputs = [ "Hello, how are you?", "What a nice day!", @@ -54,7 +54,7 @@ def test_load_model_from_local(self, local_llamacpp_model_path): Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_file=local_llamacpp_model_path) + embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) inputs = [ "Hello, how are you?", "What a nice day!", @@ -73,7 +73,7 @@ def test_load_model_from_repo(self): """ embeddings = LlamaCppEmbeddings( repo_id=self.repo_id, - model_file=self.model_name, + model_path=self.model_name, normalize_embeddings=True, ) inputs = [ @@ -94,7 +94,7 @@ def test_normalize_embeddings_true(self, local_llamacpp_model_path): Test if embeddings are normalized when normalize_embeddings is True. """ embeddings = LlamaCppEmbeddings( - model_file=local_llamacpp_model_path, normalize_embeddings=True + model_path=local_llamacpp_model_path, normalize_embeddings=True ) embeddings.load() @@ -118,7 +118,7 @@ def test_normalize_embeddings_false(self, local_llamacpp_model_path): Test if embeddings are not normalized when normalize_embeddings is False. """ embeddings = LlamaCppEmbeddings( - model_file=local_llamacpp_model_path, normalize_embeddings=False + model_path=local_llamacpp_model_path, normalize_embeddings=False ) embeddings.load() @@ -150,7 +150,7 @@ def test_encode_batch(self, local_llamacpp_model_path) -> None: Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_file=local_llamacpp_model_path) + embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) embeddings.load() # Test with different batch sizes @@ -181,7 +181,7 @@ def test_encode_batch_consistency(self, local_llamacpp_model_path) -> None: Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_file=local_llamacpp_model_path) + embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) embeddings.load() input_text = "This is a test sentence for consistency" From 29a8d56817193c827596ad46246fd41d928df871 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Thu, 3 Oct 2024 15:50:31 +0530 Subject: [PATCH 13/27] Revert "- Use HF_TOKEN to download model from hub to generate embeddings." This reverts commit 778532f78bd8ebb3cd7e4705b4fb3ae0d2843af3. HF_TOKEN can be set as env variable to download gated model --- src/distilabel/embeddings/llamacpp.py | 55 ++++----------------------- 1 file changed, 8 insertions(+), 47 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index fc06417b4b..53c9d550d8 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import tempfile from typing import TYPE_CHECKING, Any, Dict, List, Union from pydantic import Field, PrivateAttr @@ -33,7 +31,6 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): model_path: contains the path to the GGUF quantized model, compatible with the installed version of the `llama.cpp` Python bindings. repo_id: the Hugging Face Hub repository id. - hf_token: Hugging Face token for accessing gated models. verbose: whether to print verbose output. Defaults to `False`. n_gpu_layers: number of layers to run on the GPU. Defaults to `-1` (use the GPU if available). disable_cuda_device_placement: whether to disable CUDA device placement. Defaults to `True`. @@ -76,10 +73,6 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): default=None, description="The Hugging Face Hub repository id.", ) - hf_token: RuntimeParameter[Union[None, str]] = Field( - default=None, - description="Hugging Face token for accessing gated models.", - ) n_gpu_layers: int = 0 disable_cuda_device_placement: RuntimeParameter[bool] = Field( default=True, @@ -103,11 +96,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): _model: Union["_LlamaCpp", None] = PrivateAttr(None) def load(self) -> None: - """ - Loads the `gguf` model using either the path or the Hugging Face Hub repository id. - If using Hugging Face Hub, the model will be downloaded to a local directory - specified by the DISTILABEL_MODEL_DIR environment variable or to a temporary directory. - """ + """Loads the `gguf` model using either the path or the Hugging Face Hub repository id.""" super().load() CudaDevicePlacementMixin.load(self) @@ -122,56 +111,30 @@ def load(self) -> None: if self.repo_id is not None: try: - from huggingface_hub import hf_hub_download from huggingface_hub.utils import validate_repo_id + + validate_repo_id(self.repo_id) except ImportError as ie: raise ImportError( "Llama.from_pretrained requires the huggingface-hub package. " "You can install it with `pip install huggingface-hub`." ) from ie - - validate_repo_id(self.repo_id) - - # Determine the download directory - download_dir = os.environ.get("DISTILABEL_MODEL_DIR") - if download_dir is None: - download_dir = tempfile.gettempdir() - - self._logger.info( - f"Attempting to download model from Hugging Face Hub: {self.repo_id}" - ) try: - model_path = hf_hub_download( + self._model = _LlamaCpp.from_pretrained( repo_id=self.repo_id, filename=self.model_path, - token=self.hf_token, - local_dir=download_dir, - ) - self._logger.info(f"Model downloaded successfully to: {model_path}") - except Exception as e: - self._logger.error( - f"Failed to download model from Hugging Face Hub: {str(e)}" - ) - raise - - try: - self._model = _LlamaCpp( - model_path=model_path, n_gpu_layers=self.n_gpu_layers, seed=self.seed, n_ctx=self.n_ctx, n_batch=self.n_batch, verbose=self.verbose, embedding=True, - **self.extra_kwargs, + kwargs=self.extra_kwargs, ) - self._logger.info("Model loaded successfully") - except Exception as e: - self._logger.error(f"Failed to load model: {str(e)}") + except Exception: raise else: try: - self._logger.info(f"Attempting to load model from: {self.model_path}") self._model = _LlamaCpp( model_path=self.model_path, seed=self.seed, @@ -180,11 +143,9 @@ def load(self) -> None: n_batch=self.n_batch, verbose=self.verbose, embedding=True, - **self.extra_kwargs, + kwargs=self.extra_kwargs, ) - self._logger.info("Model loaded successfully") - except Exception as e: - self._logger.error(f"Failed to load model: {str(e)}") + except Exception: raise def unload(self) -> None: From b40b0d267b031a670c480739656e6a1ce071b50a Mon Sep 17 00:00:00 2001 From: bikash119 Date: Thu, 3 Oct 2024 20:40:46 +0530 Subject: [PATCH 14/27] - Removed mixin implemenation to download the model - alligned the attribute as per the review comments. --- src/distilabel/embeddings/llamacpp.py | 93 ++++++++++++++++++-------- tests/unit/embeddings/test_llamacpp.py | 11 +-- 2 files changed, 71 insertions(+), 33 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 53c9d550d8..e90ea24d45 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Any, Dict, List, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from pydantic import Field, PrivateAttr @@ -21,7 +21,7 @@ from distilabel.mixins.runtime_parameters import RuntimeParameter if TYPE_CHECKING: - from llama_cpp import Llama as _LlamaCpp + from llama_cpp import Llama class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): @@ -43,20 +43,51 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): _model: the `Llama` model instance. This attribute is meant to be used internally and should not be accessed directly. It will be set in the `load` method. + Runtime parameters: + - `n_gpu_layers`: the number of layers to use for the GPU. Defaults to `-1`. + - `verbose`: whether to print verbose output. Defaults to `False`. + - `normalize_embeddings`: whether to normalize the embeddings. Defaults to `False`. + - `extra_kwargs`: additional dictionary of keyword arguments that will be passed to the + `Llama` class of `llama_cpp` library. Defaults to `{}`. References: - [Offline inference embeddings](https://llama-cpp-python.readthedocs.io/en/stable/#embeddings) Examples: - Generating sentence embeddings: + Generating sentence embeddings using a local model: ```python + from pathlib import Path from distilabel.embeddings import LlamaCppEmbeddings - embeddings = LlamaCppEmbeddings(model="/path/to/model.gguf") + # You can follow along this example downloading the following model running the following + # command in the terminal, that will download the model to the `Downloads` folder: + # curl -L -o ~/Downloads/All-MiniLM-L6-v2-Embedding-GGUF https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/blob/main/all-MiniLM-L6-v2-Q2_K.gguf + + model_path = "Downloads/all-MiniLM-L6-v2-Q2_K.gguf" + embeddings = LlamaCppEmbeddings(model_path=str(Path.home() / model_path)) + + embeddings.load() + + results = embeddings.encode(inputs=["distilabel is awesome!", "and Argilla!"]) + # [ + # [-0.05447685346007347, -0.01623094454407692, ...], + # [4.4889533455716446e-05, 0.044016145169734955, ...], + # ] + ``` + + Generating sentence embeddings using a Hugging Face Hub model: - ## Hugging Face Hub + ```python + from pathlib import Path + from distilabel.embeddings import LlamaCppEmbeddings - ## embeddings = LlamaCppEmbeddings(repo_id="second-state/All-MiniLM-L6-v2-Embedding-GGUF", model="all-MiniLM-L6-v2-Q2_K.gguf") + # You can follow along this example downloading the following model running the following + # command in the terminal, that will download the model to the `Downloads` folder: + # curl -L -o ~/Downloads/All-MiniLM-L6-v2-Embedding-GGUF https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/blob/main/all-MiniLM-L6-v2-Q2_K.gguf + + repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" + model_path = "all-MiniLM-L6-v2-Q5_K_M.gguf" + embeddings = LlamaCppEmbeddings(repo_id=repo_id,model_path=model_path) embeddings.load() @@ -69,40 +100,44 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): """ model_path: str - repo_id: RuntimeParameter[Union[None, str]] = Field( - default=None, - description="The Hugging Face Hub repository id.", - ) - n_gpu_layers: int = 0 - disable_cuda_device_placement: RuntimeParameter[bool] = Field( - default=True, - description="Whether to disable CUDA device placement.", + + repo_id: RuntimeParameter[str] = Field( + default=None, description="The Hugging Face Hub repository id.", exclude=True ) - verbose: RuntimeParameter[bool] = Field( - default=False, - description="Whether to print verbose output from llama.cpp library.", + + n_gpu_layers: RuntimeParameter[int] = Field( + default=0, + description="The number of layers that will be loaded in the GPU.", ) + + n_ctx: int = 512 + n_batch: int = 512 + seed: int = 4294967295 + normalize_embeddings: RuntimeParameter[bool] = Field( default=False, description="Whether to normalize the embeddings.", ) - seed: int = 4294967295 - n_ctx: int = 512 - n_batch: int = 512 - extra_kwargs: RuntimeParameter[Dict[str, Any]] = Field( - default={}, - description="Additional dictionary of keyword arguments that will be passed to the `Llama` class of `llama_cpp` library.", + verbose: RuntimeParameter[bool] = Field( + default=False, + description="Whether to print verbose output from llama.cpp library.", ) - _model: Union["_LlamaCpp", None] = PrivateAttr(None) + extra_kwargs: Optional[RuntimeParameter[Dict[str, Any]]] = Field( + default_factory=dict, + description="Additional dictionary of keyword arguments that will be passed to the" + " `Llama` class of `llama_cpp` library. See all the supported arguments at: " + "https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__init__", + ) + _model: Optional["Llama"] = PrivateAttr(...) def load(self) -> None: """Loads the `gguf` model using either the path or the Hugging Face Hub repository id.""" super().load() - + self.disable_cuda_device_placement = True CudaDevicePlacementMixin.load(self) try: - from llama_cpp import Llama as _LlamaCpp + from llama_cpp import Llama except ImportError as ie: raise ImportError( "`llama-cpp-python` package is not installed. Please install it using" @@ -120,7 +155,7 @@ def load(self) -> None: "You can install it with `pip install huggingface-hub`." ) from ie try: - self._model = _LlamaCpp.from_pretrained( + self._model = Llama.from_pretrained( repo_id=self.repo_id, filename=self.model_path, n_gpu_layers=self.n_gpu_layers, @@ -135,10 +170,10 @@ def load(self) -> None: raise else: try: - self._model = _LlamaCpp( + self._model = Llama( model_path=self.model_path, - seed=self.seed, n_gpu_layers=self.n_gpu_layers, + seed=self.seed, n_ctx=self.n_ctx, n_batch=self.n_batch, verbose=self.verbose, diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py index 248ce88fdc..10885304aa 100644 --- a/tests/unit/embeddings/test_llamacpp.py +++ b/tests/unit/embeddings/test_llamacpp.py @@ -12,21 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +from pathlib import Path + import numpy as np from distilabel.embeddings import LlamaCppEmbeddings class TestLlamaCppEmbeddings: - model_name = "all-MiniLM-L6-v2-Q2_K.gguf" + model_path = "Downloads/all-MiniLM-L6-v2-Q2_K.gguf" repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" + hub_model = "all-MiniLM-L6-v2-Q5_K_M.gguf" def test_model_name(self) -> None: """ Test if the model name is correctly set. """ - embeddings = LlamaCppEmbeddings(model_path=self.model_name) - assert embeddings.model_path == self.model_name + embeddings = LlamaCppEmbeddings(model_path=str(Path.home() / self.model_path)) + assert embeddings.model_name == str(Path.home() / self.model_path) def test_encode(self, local_llamacpp_model_path) -> None: """ @@ -73,8 +76,8 @@ def test_load_model_from_repo(self): """ embeddings = LlamaCppEmbeddings( repo_id=self.repo_id, - model_path=self.model_name, normalize_embeddings=True, + model_path=self.hub_model, ) inputs = [ "Hello, how are you?", From b08f3aed05ca2baf51cec3d1ee4c25055ba2e9a9 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Fri, 4 Oct 2024 08:10:16 +0530 Subject: [PATCH 15/27] - Additional example added for private / public model --- src/distilabel/embeddings/llamacpp.py | 33 ++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index e90ea24d45..2dc640eaba 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -53,7 +53,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): - [Offline inference embeddings](https://llama-cpp-python.readthedocs.io/en/stable/#embeddings) Examples: - Generating sentence embeddings using a local model: + Generate sentence embeddings using a local model: ```python from pathlib import Path @@ -75,16 +75,12 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): # ] ``` - Generating sentence embeddings using a Hugging Face Hub model: + Generate sentence embeddings using a HuggingFace Hub public model: ```python from pathlib import Path from distilabel.embeddings import LlamaCppEmbeddings - # You can follow along this example downloading the following model running the following - # command in the terminal, that will download the model to the `Downloads` folder: - # curl -L -o ~/Downloads/All-MiniLM-L6-v2-Embedding-GGUF https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/blob/main/all-MiniLM-L6-v2-Q2_K.gguf - repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" model_path = "all-MiniLM-L6-v2-Q5_K_M.gguf" embeddings = LlamaCppEmbeddings(repo_id=repo_id,model_path=model_path) @@ -97,6 +93,31 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): # [4.4889533455716446e-05, 0.044016145169734955, ...], # ] ``` + + Generate sentence embeddings using a HuggingFace Hub private model: + + ```python + from pathlib import Path + from distilabel.embeddings import LlamaCppEmbeddings + + # You need to set environment variable to download private model to the local machine + os.environ["HF_TOKEN"] = "hf_..." + + repo_id = "private_repo_id" + model_path = "model" + embeddings = LlamaCppEmbeddings(repo_id=repo_id,model_path=model_path) + + embeddings.load() + + results = embeddings.encode(inputs=["distilabel is awesome!", "and Argilla!"]) + # [ + # [-0.05447685346007347, -0.01623094454407692, ...], + # [4.4889533455716446e-05, 0.044016145169734955, ...], + # ] + ``` + + + """ model_path: str From a49363cb39108724803ac40d2251702f1fc6e093 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Fri, 4 Oct 2024 09:50:19 +0530 Subject: [PATCH 16/27] - The tests can now be configured to use cpu or gpu based on parameter --cpu-only. `pytest tests/unit/embeddings/test_llamacpp.py --cpu-only` will generate embeddings using cpu `pytest tests/unit/embeddings/test_llamacpp.py` will generate embeddings using gpu --- src/distilabel/embeddings/llamacpp.py | 22 ++++++- tests/unit/conftest.py | 17 +++++ tests/unit/embeddings/test_llamacpp.py | 90 +++++++++++++------------- 3 files changed, 84 insertions(+), 45 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 2dc640eaba..4bf12ed2ed 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -115,7 +115,27 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): # [4.4889533455716446e-05, 0.044016145169734955, ...], # ] ``` + Generate sentence embeddings with cpu: + ```python + from pathlib import Path + from distilabel.embeddings import LlamaCppEmbeddings + + # You can follow along this example downloading the following model running the following + # command in the terminal, that will download the model to the `Downloads` folder: + # curl -L -o ~/Downloads/All-MiniLM-L6-v2-Embedding-GGUF https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/blob/main/all-MiniLM-L6-v2-Q2_K.gguf + + model_path = "Downloads/all-MiniLM-L6-v2-Q2_K.gguf" + embeddings = LlamaCppEmbeddings(model_path=str(Path.home() / model_path), n_gpu_layers=0) + + embeddings.load() + + results = embeddings.encode(inputs=["distilabel is awesome!", "and Argilla!"]) + # [ + # [-0.05447685346007347, -0.01623094454407692, ...], + # [4.4889533455716446e-05, 0.044016145169734955, ...], + # ] + ``` """ @@ -127,7 +147,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): ) n_gpu_layers: RuntimeParameter[int] = Field( - default=0, + default=-1, description="The number of layers that will be loaded in the GPU.", ) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 8425801499..3d4d86c875 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -167,3 +167,20 @@ def cleanup(): atexit.register(cleanup) return str(model_path) + + +def pytest_addoption(parser): + """ + Add a command-line option to pytest for CPU-only testing. + """ + parser.addoption( + "--cpu-only", action="store", default=False, help="Run tests on CPU only" + ) + + +@pytest.fixture +def use_cpu(request): + """ + Fixture to determine whether to use CPU based on command-line option. + """ + return request.config.getoption("--cpu-only") diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py index 10885304aa..4b426ae3bc 100644 --- a/tests/unit/embeddings/test_llamacpp.py +++ b/tests/unit/embeddings/test_llamacpp.py @@ -12,72 +12,85 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pathlib import Path import numpy as np +import pytest from distilabel.embeddings import LlamaCppEmbeddings +""" +To test with CPU only, run the following command: +pytest tests/unit/embeddings/test_llamacpp.py --cpu-only + +""" + class TestLlamaCppEmbeddings: model_path = "Downloads/all-MiniLM-L6-v2-Q2_K.gguf" repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" hub_model = "all-MiniLM-L6-v2-Q5_K_M.gguf" - def test_model_name(self) -> None: + @pytest.fixture(autouse=True) + def setup_embeddings(self, local_llamacpp_model_path, use_cpu): + """ + Fixture to set up embeddings for each test, considering CPU usage. + """ + n_gpu_layers = 0 if use_cpu else -1 + self.embeddings = LlamaCppEmbeddings( + model_path=local_llamacpp_model_path, n_gpu_layers=n_gpu_layers + ) + self.embeddings.load() + + def test_model_name(self, local_llamacpp_model_path) -> None: """ Test if the model name is correctly set. """ - embeddings = LlamaCppEmbeddings(model_path=str(Path.home() / self.model_path)) - assert embeddings.model_name == str(Path.home() / self.model_path) + assert self.embeddings.model_name == local_llamacpp_model_path - def test_encode(self, local_llamacpp_model_path) -> None: + def test_encode(self) -> None: """ Test if the model can generate embeddings. - - Args: - local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) inputs = [ "Hello, how are you?", "What a nice day!", "I hear that llamas are very popular now.", ] - embeddings.load() - results = embeddings.encode(inputs=inputs) + results = self.embeddings.encode(inputs=inputs) for result in results: assert len(result) == 384 - def test_load_model_from_local(self, local_llamacpp_model_path): + def test_load_model_from_local(self): """ Test if the model can be loaded from a local file and generate embeddings. Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) + inputs = [ "Hello, how are you?", "What a nice day!", "I hear that llamas are very popular now.", ] - embeddings.load() + # Test if the model is loaded by generating an embedding - results = embeddings.encode(inputs=inputs) + results = self.embeddings.encode(inputs=inputs) for result in results: assert len(result) == 384 - def test_load_model_from_repo(self): + def test_load_model_from_repo(self, use_cpu): """ Test if the model can be loaded from a Hugging Face repository. """ + n_gpu_layers = 0 if use_cpu else -1 embeddings = LlamaCppEmbeddings( repo_id=self.repo_id, normalize_embeddings=True, model_path=self.hub_model, + n_gpu_layers=n_gpu_layers, ) inputs = [ "Hello, how are you?", @@ -92,21 +105,23 @@ def test_load_model_from_repo(self): for result in results: assert len(result) == 384 - def test_normalize_embeddings_true(self, local_llamacpp_model_path): + def test_normalize_embeddings(self, use_cpu): """ Test if embeddings are normalized when normalize_embeddings is True. """ - embeddings = LlamaCppEmbeddings( - model_path=local_llamacpp_model_path, normalize_embeddings=True - ) - embeddings.load() - inputs = [ "Hello, how are you?", "What a nice day!", "I hear that llamas are very popular now.", ] - + n_gpu_layers = 0 if use_cpu else -1 + embeddings = LlamaCppEmbeddings( + repo_id=self.repo_id, + normalize_embeddings=True, + model_path=self.hub_model, + n_gpu_layers=n_gpu_layers, + ) + embeddings.load() results = embeddings.encode(inputs=inputs) for result in results: @@ -116,14 +131,10 @@ def test_normalize_embeddings_true(self, local_llamacpp_model_path): norm, 1.0, atol=1e-6 ), f"Norm is {norm}, expected close to 1.0" - def test_normalize_embeddings_false(self, local_llamacpp_model_path): + def test_normalize_embeddings_false(self): """ Test if embeddings are not normalized when normalize_embeddings is False. """ - embeddings = LlamaCppEmbeddings( - model_path=local_llamacpp_model_path, normalize_embeddings=False - ) - embeddings.load() inputs = [ "Hello, how are you?", @@ -131,7 +142,7 @@ def test_normalize_embeddings_false(self, local_llamacpp_model_path): "I hear that llamas are very popular now.", ] - results = embeddings.encode(inputs=inputs) + results = self.embeddings.encode(inputs=inputs) for result in results: # Check if the embedding is not normalized (L2 norm should not be close to 1) @@ -146,21 +157,15 @@ def test_normalize_embeddings_false(self, local_llamacpp_model_path): not np.isclose(norm, 1.0, atol=0.1) for norm in norms ), "Expected at least one embedding with norm not close to 1.0" - def test_encode_batch(self, local_llamacpp_model_path) -> None: + def test_encode_batch(self) -> None: """ Test if the model can generate embeddings for batches of inputs. - - Args: - local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) - embeddings.load() - # Test with different batch sizes batch_sizes = [1, 2, 5, 10] for batch_size in batch_sizes: inputs = [f"This is test sentence {i}" for i in range(batch_size)] - results = embeddings.encode(inputs=inputs) + results = self.embeddings.encode(inputs=inputs) assert ( len(results) == batch_size @@ -172,28 +177,25 @@ def test_encode_batch(self, local_llamacpp_model_path) -> None: # Test with a large batch to ensure it doesn't cause issues large_batch = ["Large batch test" for _ in range(100)] - large_results = embeddings.encode(inputs=large_batch) + large_results = self.embeddings.encode(inputs=large_batch) assert ( len(large_results) == 100 ), f"Expected 100 results for large batch, got {len(large_results)}" - def test_encode_batch_consistency(self, local_llamacpp_model_path) -> None: + def test_encode_batch_consistency(self) -> None: """ Test if the model produces consistent embeddings for the same input in different batch sizes. Args: local_llamacpp_model_path (str): Fixture providing the local model path. """ - embeddings = LlamaCppEmbeddings(model_path=local_llamacpp_model_path) - embeddings.load() - input_text = "This is a test sentence for consistency" # Generate embedding individually - single_result = embeddings.encode([input_text])[0] + single_result = self.embeddings.encode([input_text])[0] # Generate embedding as part of a batch - batch_result = embeddings.encode([input_text, "Another sentence"])[0] + batch_result = self.embeddings.encode([input_text, "Another sentence"])[0] # Compare the embeddings assert np.allclose( From 575f48e2909edec9d642601304268d958dc96b8f Mon Sep 17 00:00:00 2001 From: bikash119 Date: Fri, 4 Oct 2024 18:31:46 +0530 Subject: [PATCH 17/27] - repo_id or model_path : one of the parameters is mandatory - model (name of the model) : the model used to generate embeddings. --- src/distilabel/embeddings/llamacpp.py | 65 ++++++++++++++------------ tests/unit/conftest.py | 6 +-- tests/unit/embeddings/test_llamacpp.py | 19 ++++---- 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 4bf12ed2ed..b264473590 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from pydantic import Field, PrivateAttr @@ -28,6 +29,8 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): """`LlamaCpp` library implementation for embedding generation. Attributes: + model_name: contains the name of the GGUF quantized model, compatible with the + installed version of the `llama.cpp` Python bindings. model_path: contains the path to the GGUF quantized model, compatible with the installed version of the `llama.cpp` Python bindings. repo_id: the Hugging Face Hub repository id. @@ -63,8 +66,9 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): # command in the terminal, that will download the model to the `Downloads` folder: # curl -L -o ~/Downloads/All-MiniLM-L6-v2-Embedding-GGUF https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/blob/main/all-MiniLM-L6-v2-Q2_K.gguf - model_path = "Downloads/all-MiniLM-L6-v2-Q2_K.gguf" - embeddings = LlamaCppEmbeddings(model_path=str(Path.home() / model_path)) + model_path = "Downloads/" + model_name = "all-MiniLM-L6-v2-Q2_K.gguf" + embeddings = LlamaCppEmbeddings(model_name=model_name,model_path=str(Path.home() / model_path)) embeddings.load() @@ -82,8 +86,8 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): from distilabel.embeddings import LlamaCppEmbeddings repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" - model_path = "all-MiniLM-L6-v2-Q5_K_M.gguf" - embeddings = LlamaCppEmbeddings(repo_id=repo_id,model_path=model_path) + model_name = "all-MiniLM-L6-v2-Q5_K_M.gguf" + embeddings = LlamaCppEmbeddings(model_name=model_name,repo_id=repo_id) embeddings.load() @@ -140,7 +144,12 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): """ - model_path: str + model: str + + model_path: RuntimeParameter[str] = Field( + default=None, + description="The path to the GGUF quantized model, compatible with the installed version of the `llama.cpp` Python bindings.", + ) repo_id: RuntimeParameter[str] = Field( default=None, description="The Hugging Face Hub repository id.", exclude=True @@ -186,33 +195,25 @@ def load(self) -> None: ) from ie if self.repo_id is not None: - try: - from huggingface_hub.utils import validate_repo_id - - validate_repo_id(self.repo_id) - except ImportError as ie: - raise ImportError( - "Llama.from_pretrained requires the huggingface-hub package. " - "You can install it with `pip install huggingface-hub`." - ) from ie - try: - self._model = Llama.from_pretrained( - repo_id=self.repo_id, - filename=self.model_path, - n_gpu_layers=self.n_gpu_layers, - seed=self.seed, - n_ctx=self.n_ctx, - n_batch=self.n_batch, - verbose=self.verbose, - embedding=True, - kwargs=self.extra_kwargs, - ) - except Exception: - raise - else: + # use repo_id to download the model + from huggingface_hub.utils import validate_repo_id + + validate_repo_id(self.repo_id) + self._model = Llama.from_pretrained( + repo_id=self.repo_id, + filename=self.model, + n_gpu_layers=self.n_gpu_layers, + seed=self.seed, + n_ctx=self.n_ctx, + n_batch=self.n_batch, + verbose=self.verbose, + embedding=True, + kwargs=self.extra_kwargs, + ) + elif self.model_path is not None: try: self._model = Llama( - model_path=self.model_path, + model_path=str(Path(self.model_path) / self.model), n_gpu_layers=self.n_gpu_layers, seed=self.seed, n_ctx=self.n_ctx, @@ -223,6 +224,8 @@ def load(self) -> None: ) except Exception: raise + else: + raise ValueError("Either 'model_path' or 'repo_id' must be provided") def unload(self) -> None: """Unloads the `gguf` model.""" @@ -232,7 +235,7 @@ def unload(self) -> None: @property def model_name(self) -> str: """Returns the name of the model.""" - return self.model_path + return self.model def encode(self, inputs: List[str]) -> List[List[Union[int, float]]]: """Generates embeddings for the provided inputs. diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 3d4d86c875..bf6db90a73 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -153,20 +153,16 @@ def local_llamacpp_model_path(tmp_path_factory): model_path = tmp_path / model_name if not model_path.exists(): - print(f"Downloading test model to {model_path}...") urlretrieve(model_url, model_path) - print("Download complete.") def cleanup(): if model_path.exists(): - print(f"Cleaning up downloaded model at {model_path}...") os.remove(model_path) - print("Cleanup complete.") # Register the cleanup function to be called at exit atexit.register(cleanup) - return str(model_path) + return str(tmp_path) def pytest_addoption(parser): diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py index 4b426ae3bc..528825933b 100644 --- a/tests/unit/embeddings/test_llamacpp.py +++ b/tests/unit/embeddings/test_llamacpp.py @@ -26,26 +26,27 @@ class TestLlamaCppEmbeddings: - model_path = "Downloads/all-MiniLM-L6-v2-Q2_K.gguf" - repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" - hub_model = "all-MiniLM-L6-v2-Q5_K_M.gguf" - @pytest.fixture(autouse=True) def setup_embeddings(self, local_llamacpp_model_path, use_cpu): """ Fixture to set up embeddings for each test, considering CPU usage. """ + self.model_name = "all-MiniLM-L6-v2-Q2_K.gguf" + self.repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" n_gpu_layers = 0 if use_cpu else -1 self.embeddings = LlamaCppEmbeddings( - model_path=local_llamacpp_model_path, n_gpu_layers=n_gpu_layers + model=self.model_name, + model_path=local_llamacpp_model_path, + n_gpu_layers=n_gpu_layers, ) + self.embeddings.load() - def test_model_name(self, local_llamacpp_model_path) -> None: + def test_model_name(self) -> None: """ Test if the model name is correctly set. """ - assert self.embeddings.model_name == local_llamacpp_model_path + assert self.embeddings.model_name == self.model_name def test_encode(self) -> None: """ @@ -88,8 +89,8 @@ def test_load_model_from_repo(self, use_cpu): n_gpu_layers = 0 if use_cpu else -1 embeddings = LlamaCppEmbeddings( repo_id=self.repo_id, + model=self.model_name, normalize_embeddings=True, - model_path=self.hub_model, n_gpu_layers=n_gpu_layers, ) inputs = [ @@ -117,8 +118,8 @@ def test_normalize_embeddings(self, use_cpu): n_gpu_layers = 0 if use_cpu else -1 embeddings = LlamaCppEmbeddings( repo_id=self.repo_id, + model=self.model_name, normalize_embeddings=True, - model_path=self.hub_model, n_gpu_layers=n_gpu_layers, ) embeddings.load() From 48dce7b31b65723566f382e1a5bed05a9bc4c93d Mon Sep 17 00:00:00 2001 From: bikash119 Date: Fri, 4 Oct 2024 18:39:41 +0530 Subject: [PATCH 18/27] Added description to attribute : model --- src/distilabel/embeddings/llamacpp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index b264473590..b1e816c513 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -144,7 +144,9 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): """ - model: str + model: str = Field( + description="The name of the model to use for embeddings.", + ) model_path: RuntimeParameter[str] = Field( default=None, From 0e1fb8e39b9e9321cb6989794b3cc72eace6809e Mon Sep 17 00:00:00 2001 From: bikash119 Date: Fri, 4 Oct 2024 18:46:19 +0530 Subject: [PATCH 19/27] - Fixed examples --- src/distilabel/embeddings/llamacpp.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index b1e816c513..e7f5bf3a82 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -67,8 +67,8 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): # curl -L -o ~/Downloads/All-MiniLM-L6-v2-Embedding-GGUF https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/blob/main/all-MiniLM-L6-v2-Q2_K.gguf model_path = "Downloads/" - model_name = "all-MiniLM-L6-v2-Q2_K.gguf" - embeddings = LlamaCppEmbeddings(model_name=model_name,model_path=str(Path.home() / model_path)) + model = "all-MiniLM-L6-v2-Q2_K.gguf" + embeddings = LlamaCppEmbeddings(model=model,model_path=str(Path.home() / model_path)) embeddings.load() @@ -86,8 +86,8 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): from distilabel.embeddings import LlamaCppEmbeddings repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" - model_name = "all-MiniLM-L6-v2-Q5_K_M.gguf" - embeddings = LlamaCppEmbeddings(model_name=model_name,repo_id=repo_id) + model = "all-MiniLM-L6-v2-Q5_K_M.gguf" + embeddings = LlamaCppEmbeddings(model=model,repo_id=repo_id) embeddings.load() @@ -108,8 +108,8 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): os.environ["HF_TOKEN"] = "hf_..." repo_id = "private_repo_id" - model_path = "model" - embeddings = LlamaCppEmbeddings(repo_id=repo_id,model_path=model_path) + model = "model" + embeddings = LlamaCppEmbeddings(repo_id=repo_id,model=model) embeddings.load() @@ -129,8 +129,9 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): # command in the terminal, that will download the model to the `Downloads` folder: # curl -L -o ~/Downloads/All-MiniLM-L6-v2-Embedding-GGUF https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/blob/main/all-MiniLM-L6-v2-Q2_K.gguf - model_path = "Downloads/all-MiniLM-L6-v2-Q2_K.gguf" - embeddings = LlamaCppEmbeddings(model_path=str(Path.home() / model_path), n_gpu_layers=0) + model_path = "Downloads/" + model = "all-MiniLM-L6-v2-Q2_K.gguf" + embeddings = LlamaCppEmbeddings(model=model,model_path=str(Path.home() / model_path), n_gpu_layers=0) embeddings.load() From f72ef3094c4c469a39615e62993a43521d08ed67 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Fri, 4 Oct 2024 19:02:52 +0530 Subject: [PATCH 20/27] Updated examples --- src/distilabel/embeddings/llamacpp.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index e7f5bf3a82..62f4e86038 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -82,7 +82,6 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): Generate sentence embeddings using a HuggingFace Hub public model: ```python - from pathlib import Path from distilabel.embeddings import LlamaCppEmbeddings repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" @@ -101,7 +100,6 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): Generate sentence embeddings using a HuggingFace Hub private model: ```python - from pathlib import Path from distilabel.embeddings import LlamaCppEmbeddings # You need to set environment variable to download private model to the local machine @@ -109,7 +107,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): repo_id = "private_repo_id" model = "model" - embeddings = LlamaCppEmbeddings(repo_id=repo_id,model=model) + embeddings = LlamaCppEmbeddings(model=model,repo_id=repo_id) embeddings.load() @@ -119,6 +117,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): # [4.4889533455716446e-05, 0.044016145169734955, ...], # ] ``` + Generate sentence embeddings with cpu: ```python From 8218242977e506d21d90ab311652eefb10ecbb15 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Mon, 14 Oct 2024 17:19:05 +0530 Subject: [PATCH 21/27] Update src/distilabel/embeddings/llamacpp.py try except block is not needed. Co-authored-by: David Berenstein --- src/distilabel/embeddings/llamacpp.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 62f4e86038..b9735f465e 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -213,19 +213,16 @@ def load(self) -> None: kwargs=self.extra_kwargs, ) elif self.model_path is not None: - try: - self._model = Llama( - model_path=str(Path(self.model_path) / self.model), - n_gpu_layers=self.n_gpu_layers, - seed=self.seed, - n_ctx=self.n_ctx, - n_batch=self.n_batch, - verbose=self.verbose, - embedding=True, - kwargs=self.extra_kwargs, - ) - except Exception: - raise + self._model = Llama( + model_path=str(Path(self.model_path) / self.model), + n_gpu_layers=self.n_gpu_layers, + seed=self.seed, + n_ctx=self.n_ctx, + n_batch=self.n_batch, + verbose=self.verbose, + embedding=True, + kwargs=self.extra_kwargs, + ) else: raise ValueError("Either 'model_path' or 'repo_id' must be provided") From db004825339191b318e2ce6e0e0a919bc8438ed0 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Mon, 14 Oct 2024 17:19:41 +0530 Subject: [PATCH 22/27] Update src/distilabel/embeddings/llamacpp.py Co-authored-by: David Berenstein --- src/distilabel/embeddings/llamacpp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index b9735f465e..794a1d2164 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -52,6 +52,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): - `normalize_embeddings`: whether to normalize the embeddings. Defaults to `False`. - `extra_kwargs`: additional dictionary of keyword arguments that will be passed to the `Llama` class of `llama_cpp` library. Defaults to `{}`. + References: - [Offline inference embeddings](https://llama-cpp-python.readthedocs.io/en/stable/#embeddings) From 0fb7f15f2c8e4d99a68cc61557e84c74aaef6d9f Mon Sep 17 00:00:00 2001 From: bikash119 Date: Mon, 14 Oct 2024 17:20:20 +0530 Subject: [PATCH 23/27] Update src/distilabel/embeddings/llamacpp.py hidden attributes shouldn't be documented. Co-authored-by: David Berenstein --- src/distilabel/embeddings/llamacpp.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 794a1d2164..41f41e86c3 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -43,8 +43,6 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): n_batch: Prompt processing maximum batch size extra_kwargs: additional dictionary of keyword arguments that will be passed to the `Llama` class of `llama_cpp` library. Defaults to `{}`. - _model: the `Llama` model instance. This attribute is meant to be used internally - and should not be accessed directly. It will be set in the `load` method. Runtime parameters: - `n_gpu_layers`: the number of layers to use for the GPU. Defaults to `-1`. From 155feb280f87992e5d9e13bd69aaf40b91366b6f Mon Sep 17 00:00:00 2001 From: bikash119 Date: Mon, 14 Oct 2024 18:01:54 +0530 Subject: [PATCH 24/27] Updated test to set disable_cuda_device_placement=True when testing for cpu --- .gitignore | 3 -- src/distilabel/embeddings/llamacpp.py | 1 - tests/unit/embeddings/test_llamacpp.py | 69 +++++++++++--------------- 3 files changed, 29 insertions(+), 44 deletions(-) diff --git a/.gitignore b/.gitignore index 93707388c7..d8337200af 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,3 @@ venv.bak/ # Other *.log *.swp -.DS_Store -#models -tests/model diff --git a/src/distilabel/embeddings/llamacpp.py b/src/distilabel/embeddings/llamacpp.py index 41f41e86c3..11382ac91e 100644 --- a/src/distilabel/embeddings/llamacpp.py +++ b/src/distilabel/embeddings/llamacpp.py @@ -184,7 +184,6 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): def load(self) -> None: """Loads the `gguf` model using either the path or the Hugging Face Hub repository id.""" super().load() - self.disable_cuda_device_placement = True CudaDevicePlacementMixin.load(self) try: diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py index 528825933b..8f0a97d8b1 100644 --- a/tests/unit/embeddings/test_llamacpp.py +++ b/tests/unit/embeddings/test_llamacpp.py @@ -33,36 +33,47 @@ def setup_embeddings(self, local_llamacpp_model_path, use_cpu): """ self.model_name = "all-MiniLM-L6-v2-Q2_K.gguf" self.repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" + self.disable_cuda_device_placement = True n_gpu_layers = 0 if use_cpu else -1 self.embeddings = LlamaCppEmbeddings( model=self.model_name, model_path=local_llamacpp_model_path, n_gpu_layers=n_gpu_layers, + disable_cuda_device_placement=self.disable_cuda_device_placement, ) self.embeddings.load() + @pytest.fixture + def test_inputs(self): + """ + Fixture that provides a list of test input strings. + + Returns: + list: A list of strings to be used as test inputs for embeddings. + """ + return [ + "Hello, how are you?", + "What a nice day!", + "I hear that llamas are very popular now.", + ] + def test_model_name(self) -> None: """ Test if the model name is correctly set. """ assert self.embeddings.model_name == self.model_name - def test_encode(self) -> None: + def test_encode(self, test_inputs) -> None: """ Test if the model can generate embeddings. """ - inputs = [ - "Hello, how are you?", - "What a nice day!", - "I hear that llamas are very popular now.", - ] - results = self.embeddings.encode(inputs=inputs) + results = self.embeddings.encode(inputs=test_inputs) for result in results: assert len(result) == 384 - def test_load_model_from_local(self): + def test_load_model_from_local(self, test_inputs): """ Test if the model can be loaded from a local file and generate embeddings. @@ -70,19 +81,12 @@ def test_load_model_from_local(self): local_llamacpp_model_path (str): Fixture providing the local model path. """ - inputs = [ - "Hello, how are you?", - "What a nice day!", - "I hear that llamas are very popular now.", - ] - - # Test if the model is loaded by generating an embedding - results = self.embeddings.encode(inputs=inputs) + results = self.embeddings.encode(inputs=test_inputs) for result in results: assert len(result) == 384 - def test_load_model_from_repo(self, use_cpu): + def test_load_model_from_repo(self, use_cpu, test_inputs): """ Test if the model can be loaded from a Hugging Face repository. """ @@ -92,38 +96,29 @@ def test_load_model_from_repo(self, use_cpu): model=self.model_name, normalize_embeddings=True, n_gpu_layers=n_gpu_layers, + disable_cuda_device_placement=self.disable_cuda_device_placement, ) - inputs = [ - "Hello, how are you?", - "What a nice day!", - "I hear that llamas are very popular now.", - ] - embeddings.load() - # Test if the model is loaded by generating an embedding - results = embeddings.encode(inputs=inputs) + results = embeddings.encode(inputs=test_inputs) for result in results: assert len(result) == 384 - def test_normalize_embeddings(self, use_cpu): + def test_normalize_embeddings(self, use_cpu, test_inputs): """ Test if embeddings are normalized when normalize_embeddings is True. """ - inputs = [ - "Hello, how are you?", - "What a nice day!", - "I hear that llamas are very popular now.", - ] + n_gpu_layers = 0 if use_cpu else -1 embeddings = LlamaCppEmbeddings( repo_id=self.repo_id, model=self.model_name, normalize_embeddings=True, n_gpu_layers=n_gpu_layers, + disable_cuda_device_placement=self.disable_cuda_device_placement, ) embeddings.load() - results = embeddings.encode(inputs=inputs) + results = embeddings.encode(inputs=test_inputs) for result in results: # Check if the embedding is normalized (L2 norm should be close to 1) @@ -132,18 +127,12 @@ def test_normalize_embeddings(self, use_cpu): norm, 1.0, atol=1e-6 ), f"Norm is {norm}, expected close to 1.0" - def test_normalize_embeddings_false(self): + def test_normalize_embeddings_false(self, test_inputs): """ Test if embeddings are not normalized when normalize_embeddings is False. """ - inputs = [ - "Hello, how are you?", - "What a nice day!", - "I hear that llamas are very popular now.", - ] - - results = self.embeddings.encode(inputs=inputs) + results = self.embeddings.encode(inputs=test_inputs) for result in results: # Check if the embedding is not normalized (L2 norm should not be close to 1) From 365940093162501e2a3c041c58d22d565aa64e3e Mon Sep 17 00:00:00 2001 From: bikash119 Date: Wed, 16 Oct 2024 18:16:01 +0530 Subject: [PATCH 25/27] testcase will by default load the model to cpu --- tests/unit/conftest.py | 45 +------------------------- tests/unit/embeddings/test_llamacpp.py | 22 ++++--------- 2 files changed, 8 insertions(+), 59 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index bf6db90a73..7e8fe74df5 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -112,41 +112,15 @@ def local_llamacpp_model_path(tmp_path_factory): """ Session-scoped fixture that provides the local model path for LlamaCpp testing. - The model path can be set using the LLAMACPP_TEST_MODEL_PATH environment variable. - If not set, it downloads a small test model to a temporary directory. + Download a small test model to a temporary directory. The model is downloaded once per test session and cleaned up after all tests. - To use a custom model: - 1. Set the LLAMACPP_TEST_MODEL_PATH environment variable to the path of your model file. - 2. Ensure the model file exists at the specified path. - - Example: - export LLAMACPP_TEST_MODEL_PATH="/path/to/your/model.gguf" - Args: tmp_path_factory: Pytest fixture providing a temporary directory factory. Returns: str: The path to the local LlamaCpp model file. """ - print("\nLlamaCpp model path information:") - - # Check for environment variable first - env_path = os.environ.get("LLAMACPP_TEST_MODEL_PATH") - if env_path: - print(f"Using custom model path from LLAMACPP_TEST_MODEL_PATH: {env_path}") - if not os.path.exists(env_path): - raise FileNotFoundError( - f"Custom model file not found at {env_path}. Please ensure the file exists." - ) - return env_path - - print("LLAMACPP_TEST_MODEL_PATH not set. Using default test model.") - print( - "To use a custom model, set the LLAMACPP_TEST_MODEL_PATH environment variable to the path of your model file." - ) - - # If env var not set, use a small test model model_name = "all-MiniLM-L6-v2-Q2_K.gguf" model_url = f"https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/{model_name}" tmp_path = tmp_path_factory.getbasetemp() @@ -163,20 +137,3 @@ def cleanup(): atexit.register(cleanup) return str(tmp_path) - - -def pytest_addoption(parser): - """ - Add a command-line option to pytest for CPU-only testing. - """ - parser.addoption( - "--cpu-only", action="store", default=False, help="Run tests on CPU only" - ) - - -@pytest.fixture -def use_cpu(request): - """ - Fixture to determine whether to use CPU based on command-line option. - """ - return request.config.getoption("--cpu-only") diff --git a/tests/unit/embeddings/test_llamacpp.py b/tests/unit/embeddings/test_llamacpp.py index 8f0a97d8b1..d7d8b55fbb 100644 --- a/tests/unit/embeddings/test_llamacpp.py +++ b/tests/unit/embeddings/test_llamacpp.py @@ -18,27 +18,21 @@ from distilabel.embeddings import LlamaCppEmbeddings -""" -To test with CPU only, run the following command: -pytest tests/unit/embeddings/test_llamacpp.py --cpu-only - -""" - class TestLlamaCppEmbeddings: @pytest.fixture(autouse=True) - def setup_embeddings(self, local_llamacpp_model_path, use_cpu): + def setup_embeddings(self, local_llamacpp_model_path): """ Fixture to set up embeddings for each test, considering CPU usage. """ self.model_name = "all-MiniLM-L6-v2-Q2_K.gguf" self.repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" self.disable_cuda_device_placement = True - n_gpu_layers = 0 if use_cpu else -1 + self.n_gpu_layers = 0 self.embeddings = LlamaCppEmbeddings( model=self.model_name, model_path=local_llamacpp_model_path, - n_gpu_layers=n_gpu_layers, + n_gpu_layers=self.n_gpu_layers, disable_cuda_device_placement=self.disable_cuda_device_placement, ) @@ -86,16 +80,15 @@ def test_load_model_from_local(self, test_inputs): for result in results: assert len(result) == 384 - def test_load_model_from_repo(self, use_cpu, test_inputs): + def test_load_model_from_repo(self, test_inputs): """ Test if the model can be loaded from a Hugging Face repository. """ - n_gpu_layers = 0 if use_cpu else -1 embeddings = LlamaCppEmbeddings( repo_id=self.repo_id, model=self.model_name, normalize_embeddings=True, - n_gpu_layers=n_gpu_layers, + n_gpu_layers=self.n_gpu_layers, disable_cuda_device_placement=self.disable_cuda_device_placement, ) embeddings.load() @@ -104,17 +97,16 @@ def test_load_model_from_repo(self, use_cpu, test_inputs): for result in results: assert len(result) == 384 - def test_normalize_embeddings(self, use_cpu, test_inputs): + def test_normalize_embeddings(self, test_inputs): """ Test if embeddings are normalized when normalize_embeddings is True. """ - n_gpu_layers = 0 if use_cpu else -1 embeddings = LlamaCppEmbeddings( repo_id=self.repo_id, model=self.model_name, normalize_embeddings=True, - n_gpu_layers=n_gpu_layers, + n_gpu_layers=self.n_gpu_layers, disable_cuda_device_placement=self.disable_cuda_device_placement, ) embeddings.load() From da92cc955b78e5d54102b9c6874b59c8b5a5ac8a Mon Sep 17 00:00:00 2001 From: bikash119 Date: Sat, 26 Oct 2024 07:55:00 +0530 Subject: [PATCH 26/27] example code updated --- src/distilabel/models/embeddings/llamacpp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/distilabel/models/embeddings/llamacpp.py b/src/distilabel/models/embeddings/llamacpp.py index aa6d945b56..a72f989d11 100644 --- a/src/distilabel/models/embeddings/llamacpp.py +++ b/src/distilabel/models/embeddings/llamacpp.py @@ -59,7 +59,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): ```python from pathlib import Path - from distilabel.embeddings import LlamaCppEmbeddings + from distilabel.models.embeddings import LlamaCppEmbeddings # You can follow along this example downloading the following model running the following # command in the terminal, that will download the model to the `Downloads` folder: @@ -81,7 +81,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): Generate sentence embeddings using a HuggingFace Hub public model: ```python - from distilabel.embeddings import LlamaCppEmbeddings + from distilabel.models.embeddings import LlamaCppEmbeddings repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" model = "all-MiniLM-L6-v2-Q5_K_M.gguf" @@ -99,7 +99,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): Generate sentence embeddings using a HuggingFace Hub private model: ```python - from distilabel.embeddings import LlamaCppEmbeddings + from distilabel.models.embeddings import LlamaCppEmbeddings # You need to set environment variable to download private model to the local machine os.environ["HF_TOKEN"] = "hf_..." @@ -121,7 +121,7 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): ```python from pathlib import Path - from distilabel.embeddings import LlamaCppEmbeddings + from distilabel.models.embeddings import LlamaCppEmbeddings # You can follow along this example downloading the following model running the following # command in the terminal, that will download the model to the `Downloads` folder: From 09dd551491d580941cf8e26483c7230da9c25834 Mon Sep 17 00:00:00 2001 From: bikash119 Date: Sat, 26 Oct 2024 09:57:42 +0530 Subject: [PATCH 27/27] examples fixed --- src/distilabel/models/embeddings/llamacpp.py | 53 ++++++++------------ 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/src/distilabel/models/embeddings/llamacpp.py b/src/distilabel/models/embeddings/llamacpp.py index a72f989d11..6596bb45ea 100644 --- a/src/distilabel/models/embeddings/llamacpp.py +++ b/src/distilabel/models/embeddings/llamacpp.py @@ -63,54 +63,37 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): # You can follow along this example downloading the following model running the following # command in the terminal, that will download the model to the `Downloads` folder: - # curl -L -o ~/Downloads/All-MiniLM-L6-v2-Embedding-GGUF https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/blob/main/all-MiniLM-L6-v2-Q2_K.gguf + # curl -L -o ~/Downloads/all-MiniLM-L6-v2-Q2_K.gguf https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/all-MiniLM-L6-v2-Q2_K.gguf model_path = "Downloads/" model = "all-MiniLM-L6-v2-Q2_K.gguf" - embeddings = LlamaCppEmbeddings(model=model,model_path=str(Path.home() / model_path)) + embeddings = LlamaCppEmbeddings( + model=model, + model_path=str(Path.home() / model_path), + ) embeddings.load() results = embeddings.encode(inputs=["distilabel is awesome!", "and Argilla!"]) - # [ - # [-0.05447685346007347, -0.01623094454407692, ...], - # [4.4889533455716446e-05, 0.044016145169734955, ...], - # ] + print(results) + embeddings.unload() ``` - Generate sentence embeddings using a HuggingFace Hub public model: + Generate sentence embeddings using a HuggingFace Hub model: ```python from distilabel.models.embeddings import LlamaCppEmbeddings - - repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" - model = "all-MiniLM-L6-v2-Q5_K_M.gguf" - embeddings = LlamaCppEmbeddings(model=model,repo_id=repo_id) - - embeddings.load() - - results = embeddings.encode(inputs=["distilabel is awesome!", "and Argilla!"]) - # [ - # [-0.05447685346007347, -0.01623094454407692, ...], - # [4.4889533455716446e-05, 0.044016145169734955, ...], - # ] - ``` - - Generate sentence embeddings using a HuggingFace Hub private model: - - ```python - from distilabel.models.embeddings import LlamaCppEmbeddings - # You need to set environment variable to download private model to the local machine - os.environ["HF_TOKEN"] = "hf_..." - repo_id = "private_repo_id" - model = "model" + repo_id = "second-state/All-MiniLM-L6-v2-Embedding-GGUF" + model = "all-MiniLM-L6-v2-Q2_K.gguf" embeddings = LlamaCppEmbeddings(model=model,repo_id=repo_id) embeddings.load() results = embeddings.encode(inputs=["distilabel is awesome!", "and Argilla!"]) + print(results) + embeddings.unload() # [ # [-0.05447685346007347, -0.01623094454407692, ...], # [4.4889533455716446e-05, 0.044016145169734955, ...], @@ -125,15 +108,22 @@ class LlamaCppEmbeddings(Embeddings, CudaDevicePlacementMixin): # You can follow along this example downloading the following model running the following # command in the terminal, that will download the model to the `Downloads` folder: - # curl -L -o ~/Downloads/All-MiniLM-L6-v2-Embedding-GGUF https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/blob/main/all-MiniLM-L6-v2-Q2_K.gguf + # curl -L -o ~/Downloads/all-MiniLM-L6-v2-Q2_K.gguf https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/all-MiniLM-L6-v2-Q2_K.gguf model_path = "Downloads/" model = "all-MiniLM-L6-v2-Q2_K.gguf" - embeddings = LlamaCppEmbeddings(model=model,model_path=str(Path.home() / model_path), n_gpu_layers=0) + embeddings = LlamaCppEmbeddings( + model=model, + model_path=str(Path.home() / model_path), + n_gpu_layers=0, + disable_cuda_device_placement=True, + ) embeddings.load() results = embeddings.encode(inputs=["distilabel is awesome!", "and Argilla!"]) + print(results) + embeddings.unload() # [ # [-0.05447685346007347, -0.01623094454407692, ...], # [4.4889533455716446e-05, 0.044016145169734955, ...], @@ -227,6 +217,7 @@ def load(self) -> None: def unload(self) -> None: """Unloads the `gguf` model.""" CudaDevicePlacementMixin.unload(self) + self._model.close() super().unload() @property