aleksandr-mokrov · aleksandr-mokrov · Aug 8, 2024 · Aug 11, 2024 · Aug 19, 2024 · Aug 19, 2024
diff --git a/docs/source/openvino/reference.mdx b/docs/source/openvino/reference.mdx
@@ -19,7 +19,7 @@ limitations under the License.
 ## Generic model classes
 
 [[autodoc]] openvino.modeling_base.OVBaseModel
-    - _from_pretrained
+    - from_pretrained
     - reshape
 
 ## Natural Language Processing

diff --git a/docs/source/openvino/tutorials/diffusers.mdx b/docs/source/openvino/tutorials/diffusers.mdx
@@ -50,18 +50,14 @@ To further speed up inference, the model can be statically reshaped :
 
 ```python
 # Define the shapes related to the inputs and desired outputs
-batch_size = 1
-num_images_per_prompt = 1
-height = 512
-width = 512
-
+batch_size, num_images, height, width = 1, 1, 512, 512
 # Statically reshape the model
-pipeline.reshape(batch_size=batch_size, height=height, width=width, num_images_per_prompt=num_images_per_prompt)
+pipeline.reshape(batch_size=batch_size, height=height, width=width, num_images_per_prompt=num_images)
 # Compile the model before the first inference
 pipeline.compile()
 
 # Run inference
-images = pipeline(prompt, height=height, width=width, num_images_per_prompt=num_images_per_prompt).images
+images = pipeline(prompt, height=height, width=width, num_images_per_prompt=num_images).images
 ```
 
 In case you want to change any parameters such as the outputs height or width, you'll need to statically reshape your model once again.

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
@@ -49,15 +49,6 @@
     import torch
 
 
-_COMPRESSION_OPTIONS = {
-    "int8": {"bits": 8},
-    "int4_sym_g128": {"bits": 4, "sym": True, "group_size": 128},
-    "int4_asym_g128": {"bits": 4, "sym": False, "group_size": 128},
-    "int4_sym_g64": {"bits": 4, "sym": True, "group_size": 64},
-    "int4_asym_g64": {"bits": 4, "sym": False, "group_size": 64},
-}
-
-
 logger = logging.getLogger(__name__)
 
 
@@ -108,8 +99,6 @@ def main_export(
     model_kwargs: Optional[Dict[str, Any]] = None,
     custom_export_configs: Optional[Dict[str, "OnnxConfig"]] = None,
     fn_get_submodels: Optional[Callable] = None,
-    compression_option: Optional[str] = None,
-    compression_ratio: Optional[float] = None,
     ov_config: "OVConfig" = None,
     stateful: bool = True,
     convert_tokenizer: bool = False,
@@ -171,11 +160,6 @@ def main_export(
         fn_get_submodels (`Optional[Callable]`, defaults to `None`):
             Experimental usage: Override the default submodels that are used at the export. This is
             especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success.
-        compression_option (`Optional[str]`, defaults to `None`):
-            The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `int4_sym_g128` - INT4 symmetric weights w/ group size 128, `int4_asym_g128` - as previous but asymmetric w/ zero-point,
-            `int4_sym_g64` - INT4 symmetric weights w/ group size 64, "int4_asym_g64" - as previous but asymmetric w/ zero-point, `f32` - means no compression.
-        compression_ratio (`Optional[float]`, defaults to `None`):
-            Compression ratio between primary and backup precision (only relevant to INT4).
         stateful (`bool`, defaults to `True`):
             Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs. Applicable only for decoder models.
         **kwargs_shapes (`Dict`):
@@ -198,28 +182,6 @@ def main_export(
             raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
         token = use_auth_token
 
-    if compression_option is not None:
-        logger.warning(
-            "The `compression_option` argument is deprecated and will be removed in optimum-intel v1.17.0. "
-            "Please, pass an `ov_config` argument instead `OVConfig(..., quantization_config=quantization_config)`."
-        )
-
-    if compression_ratio is not None:
-        logger.warning(
-            "The `compression_ratio` argument is deprecated and will be removed in optimum-intel v1.17.0. "
-            "Please, pass an `ov_config` argument instead `OVConfig(quantization_config={ratio=compression_ratio})`."
-        )
-
-    if ov_config is None and compression_option is not None:
-        from ...intel.openvino.configuration import OVConfig
-
-        if compression_option == "fp16":
-            ov_config = OVConfig(dtype="fp16")
-        elif compression_option != "fp32":
-            q_config = _COMPRESSION_OPTIONS[compression_option] if compression_option in _COMPRESSION_OPTIONS else {}
-            q_config["ratio"] = compression_ratio or 1.0
-            ov_config = OVConfig(quantization_config=q_config)
-
     original_task = task
     task = infer_task(
         task, model_name_or_path, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token

diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
@@ -24,6 +24,7 @@
     is_neural_compressor_available,
     is_nncf_available,
     is_openvino_available,
+    is_sentence_transformers_available,
 )
 from .version import __version__
 
@@ -179,6 +180,21 @@
     _import_structure["neural_compressor"].append("INCStableDiffusionPipeline")
 
 
+try:
+    if not (is_openvino_available() and is_sentence_transformers_available()):
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    _import_structure["utils.dummy_openvino_and_sentence_transformers_objects"] = [
+        "OVSentenceTransformer",
+    ]
+else:
+    _import_structure["openvino"].extend(
+        [
+            "OVSentenceTransformer",
+        ]
+    )
+
+
 if TYPE_CHECKING:
     try:
         if not is_ipex_available():
@@ -302,6 +318,18 @@
     else:
         from .neural_compressor import INCStableDiffusionPipeline
 
+    try:
+        if not (is_openvino_available() and is_sentence_transformers_available()):
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        from .utils.dummy_openvino_and_sentence_transformers_objects import (
+            OVSentenceTransformer,
+        )
+    else:
+        from .openvino import (
+            OVSentenceTransformer,
+        )
+
 else:
     import sys
 

diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py
@@ -15,7 +15,12 @@
 import logging
 import warnings
 
-from ..utils.import_utils import is_accelerate_available, is_diffusers_available, is_nncf_available
+from ..utils.import_utils import (
+    is_accelerate_available,
+    is_diffusers_available,
+    is_nncf_available,
+    is_sentence_transformers_available,
+)
 from .utils import (
     OV_DECODER_NAME,
     OV_DECODER_WITH_PAST_NAME,
@@ -77,3 +82,7 @@
         OVStableDiffusionXLImg2ImgPipeline,
         OVStableDiffusionXLPipeline,
     )
+
+
+if is_sentence_transformers_available():
+    from .modeling_sentence_transformers import OVSentenceTransformer
diff --git a/optimum/intel/openvino/modeling.py b/optimum/intel/openvino/modeling.py
@@ -14,7 +14,6 @@
 
 import logging
 import os
-import warnings
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Dict, Optional, Union
@@ -370,6 +369,13 @@ class OVModelForFeatureExtraction(OVModel):
     auto_model_class = AutoModel
 
     def __init__(self, model=None, config=None, **kwargs):
+        if {"token_embeddings", "sentence_embedding"}.issubset(
+            {name for output in model.outputs for name in output.names}
+        ):  # Sentence Transormers outputs
+            raise ValueError(
+                "This model is a Sentence Transformers model. Please use `OVSentenceTransformer` to load this model."
+            )
+
         super().__init__(model, config, **kwargs)
 
     @add_start_docstrings_to_model_forward(
@@ -417,7 +423,6 @@ def _from_transformers(
         cls,
         model_id: str,
         config: PretrainedConfig,
-        use_auth_token: Optional[Union[bool, str]] = None,
         token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
@@ -430,15 +435,6 @@ def _from_transformers(
         quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
         **kwargs,
     ):
-        if use_auth_token is not None:
-            warnings.warn(
-                "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
-                FutureWarning,
-            )
-            if token is not None:
-                raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
-            token = use_auth_token
-
         save_dir = TemporaryDirectory()
         save_dir_path = Path(save_dir.name)
         # This attribute is needed to keep one reference on the temporary directory, since garbage collecting
@@ -591,7 +587,6 @@ def from_pretrained(
         model_id: Union[str, Path],
         export: bool = False,
         config: Optional["PretrainedConfig"] = None,
-        use_auth_token: Optional[Union[bool, str]] = None,
         token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
@@ -602,15 +597,6 @@ def from_pretrained(
         trust_remote_code: bool = False,
         **kwargs,
     ):
-        if use_auth_token is not None:
-            warnings.warn(
-                "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
-                FutureWarning,
-            )
-            if token is not None:
-                raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
-            token = use_auth_token
-
         # Fix the mismatch between timm_config and huggingface_config
         local_timm_model = _is_timm_ov_dir(model_id)
         if local_timm_model or (not os.path.isdir(model_id) and model_info(model_id).library_name == "timm"):