Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support sentence-transformers #1

Open
wants to merge 33 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
94ced94
Support sentence transformer
aleksandr-mokrov Aug 8, 2024
9610e0b
FIx error
aleksandr-mokrov Aug 11, 2024
12c751e
Separate class for SentenceTransfromers and test
aleksandr-mokrov Aug 19, 2024
b4f4e82
Separate class interface for Sentence Transformers
aleksandr-mokrov Aug 19, 2024
ebf0889
Check if object has encode method
aleksandr-mokrov Aug 19, 2024
f3c03ec
Formatting and change uotput checking
aleksandr-mokrov Aug 19, 2024
2ee9be4
Tokenizer init moved to __init__, sentence-tranformer pipeline in OVM…
aleksandr-mokrov Aug 23, 2024
ef0ea33
Remove model_max_length default value
aleksandr-mokrov Aug 28, 2024
f4edf85
Merge branch 'main' into sentence-transformers
IlyasMoutawwakil Aug 29, 2024
531db36
Update tests/openvino/test_modeling_sentence_transformers.py
aleksandr-mokrov Sep 3, 2024
876c166
Update tests/openvino/test_modeling.py
aleksandr-mokrov Sep 3, 2024
50a72ea
Move tokenizer initialization, other improvements
aleksandr-mokrov Sep 3, 2024
d7abc3d
Update optimum/intel/openvino/modeling_sentence_transformers.py
aleksandr-mokrov Sep 3, 2024
932dc75
Renaming OVModelForSentenceTransformer to OVSentenceTransformer
aleksandr-mokrov Sep 3, 2024
40194a0
Deprecate export parameters (#886)
nikita-savelyevv Sep 4, 2024
6b20949
Make style
aleksandr-mokrov Sep 4, 2024
3586b5b
Infer if the model needs to be exported (#825)
echarlaix Sep 5, 2024
abbdc7c
Move checking to init
aleksandr-mokrov Sep 5, 2024
89e2b07
Update tests/openvino/test_modeling.py
aleksandr-mokrov Sep 5, 2024
40d5e4d
Add dummy_openvino_and_sentence_transformers_objects.py
aleksandr-mokrov Sep 5, 2024
c971bff
Merge branch 'sentence-transformers' of https://github.com/aleksandr-…
aleksandr-mokrov Sep 5, 2024
a684307
refactoring
aleksandr-mokrov Sep 5, 2024
1d93dee
Update optimum/intel/utils/dummy_openvino_and_sentence_transformers_o…
aleksandr-mokrov Sep 5, 2024
ba90de1
Merge branch 'sentence-transformers' of https://github.com/aleksandr-…
aleksandr-mokrov Sep 5, 2024
db6e2e4
Add tests to check saving and loading model
aleksandr-mokrov Sep 5, 2024
4083e7d
Add tests to check saving and loading model
aleksandr-mokrov Sep 5, 2024
2077b6c
Support cls._library_name
aleksandr-mokrov Sep 5, 2024
cebade8
Fix test
aleksandr-mokrov Sep 5, 2024
c9b84f9
Refactoring
aleksandr-mokrov Sep 5, 2024
d395a65
Refactoring fix
aleksandr-mokrov Sep 5, 2024
2c52db5
Merge branch 'main' into ov-stc-trfs
echarlaix Sep 6, 2024
8760aa4
Update optimum/intel/openvino/modeling.py
echarlaix Sep 6, 2024
f6f0182
Fix test
aleksandr-mokrov Sep 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/openvino/reference.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ limitations under the License.
## Generic model classes

[[autodoc]] openvino.modeling_base.OVBaseModel
- _from_pretrained
- from_pretrained
- reshape

## Natural Language Processing
Expand Down
10 changes: 3 additions & 7 deletions docs/source/openvino/tutorials/diffusers.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,14 @@ To further speed up inference, the model can be statically reshaped :

```python
# Define the shapes related to the inputs and desired outputs
batch_size = 1
num_images_per_prompt = 1
height = 512
width = 512

batch_size, num_images, height, width = 1, 1, 512, 512
# Statically reshape the model
pipeline.reshape(batch_size=batch_size, height=height, width=width, num_images_per_prompt=num_images_per_prompt)
pipeline.reshape(batch_size=batch_size, height=height, width=width, num_images_per_prompt=num_images)
# Compile the model before the first inference
pipeline.compile()

# Run inference
images = pipeline(prompt, height=height, width=width, num_images_per_prompt=num_images_per_prompt).images
images = pipeline(prompt, height=height, width=width, num_images_per_prompt=num_images).images
```

In case you want to change any parameters such as the outputs height or width, you'll need to statically reshape your model once again.
Expand Down
38 changes: 0 additions & 38 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,6 @@
import torch


_COMPRESSION_OPTIONS = {
"int8": {"bits": 8},
"int4_sym_g128": {"bits": 4, "sym": True, "group_size": 128},
"int4_asym_g128": {"bits": 4, "sym": False, "group_size": 128},
"int4_sym_g64": {"bits": 4, "sym": True, "group_size": 64},
"int4_asym_g64": {"bits": 4, "sym": False, "group_size": 64},
}


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -108,8 +99,6 @@ def main_export(
model_kwargs: Optional[Dict[str, Any]] = None,
custom_export_configs: Optional[Dict[str, "OnnxConfig"]] = None,
fn_get_submodels: Optional[Callable] = None,
compression_option: Optional[str] = None,
compression_ratio: Optional[float] = None,
ov_config: "OVConfig" = None,
stateful: bool = True,
convert_tokenizer: bool = False,
Expand Down Expand Up @@ -171,11 +160,6 @@ def main_export(
fn_get_submodels (`Optional[Callable]`, defaults to `None`):
Experimental usage: Override the default submodels that are used at the export. This is
especially useful when exporting a custom architecture that needs to split the ONNX (e.g. encoder-decoder). If unspecified with custom models, optimum will try to use the default submodels used for the given task, with no guarantee of success.
compression_option (`Optional[str]`, defaults to `None`):
The weight compression option, e.g. `f16` stands for float16 weights, `i8` - INT8 weights, `int4_sym_g128` - INT4 symmetric weights w/ group size 128, `int4_asym_g128` - as previous but asymmetric w/ zero-point,
`int4_sym_g64` - INT4 symmetric weights w/ group size 64, "int4_asym_g64" - as previous but asymmetric w/ zero-point, `f32` - means no compression.
compression_ratio (`Optional[float]`, defaults to `None`):
Compression ratio between primary and backup precision (only relevant to INT4).
stateful (`bool`, defaults to `True`):
Produce stateful model where all kv-cache inputs and outputs are hidden in the model and are not exposed as model inputs and outputs. Applicable only for decoder models.
**kwargs_shapes (`Dict`):
Expand All @@ -198,28 +182,6 @@ def main_export(
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

if compression_option is not None:
logger.warning(
"The `compression_option` argument is deprecated and will be removed in optimum-intel v1.17.0. "
"Please, pass an `ov_config` argument instead `OVConfig(..., quantization_config=quantization_config)`."
)

if compression_ratio is not None:
logger.warning(
"The `compression_ratio` argument is deprecated and will be removed in optimum-intel v1.17.0. "
"Please, pass an `ov_config` argument instead `OVConfig(quantization_config={ratio=compression_ratio})`."
)

if ov_config is None and compression_option is not None:
from ...intel.openvino.configuration import OVConfig

if compression_option == "fp16":
ov_config = OVConfig(dtype="fp16")
elif compression_option != "fp32":
q_config = _COMPRESSION_OPTIONS[compression_option] if compression_option in _COMPRESSION_OPTIONS else {}
q_config["ratio"] = compression_ratio or 1.0
ov_config = OVConfig(quantization_config=q_config)

original_task = task
task = infer_task(
task, model_name_or_path, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token
Expand Down
28 changes: 28 additions & 0 deletions optimum/intel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
is_neural_compressor_available,
is_nncf_available,
is_openvino_available,
is_sentence_transformers_available,
)
from .version import __version__

Expand Down Expand Up @@ -179,6 +180,21 @@
_import_structure["neural_compressor"].append("INCStableDiffusionPipeline")


try:
if not (is_openvino_available() and is_sentence_transformers_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
_import_structure["utils.dummy_openvino_and_sentence_transformers_objects"] = [
"OVSentenceTransformer",
]
else:
_import_structure["openvino"].extend(
[
"OVSentenceTransformer",
]
)


if TYPE_CHECKING:
try:
if not is_ipex_available():
Expand Down Expand Up @@ -302,6 +318,18 @@
else:
from .neural_compressor import INCStableDiffusionPipeline

try:
if not (is_openvino_available() and is_sentence_transformers_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from .utils.dummy_openvino_and_sentence_transformers_objects import (
OVSentenceTransformer,
)
else:
from .openvino import (
OVSentenceTransformer,
)

else:
import sys

Expand Down
11 changes: 10 additions & 1 deletion optimum/intel/openvino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
import logging
import warnings

from ..utils.import_utils import is_accelerate_available, is_diffusers_available, is_nncf_available
from ..utils.import_utils import (
is_accelerate_available,
is_diffusers_available,
is_nncf_available,
is_sentence_transformers_available,
)
from .utils import (
OV_DECODER_NAME,
OV_DECODER_WITH_PAST_NAME,
Expand Down Expand Up @@ -77,3 +82,7 @@
OVStableDiffusionXLImg2ImgPipeline,
OVStableDiffusionXLPipeline,
)


if is_sentence_transformers_available():
from .modeling_sentence_transformers import OVSentenceTransformer
28 changes: 7 additions & 21 deletions optimum/intel/openvino/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import logging
import os
import warnings
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Dict, Optional, Union
Expand Down Expand Up @@ -370,6 +369,13 @@ class OVModelForFeatureExtraction(OVModel):
auto_model_class = AutoModel

def __init__(self, model=None, config=None, **kwargs):
if {"token_embeddings", "sentence_embedding"}.issubset(
{name for output in model.outputs for name in output.names}
): # Sentence Transormers outputs
raise ValueError(
"This model is a Sentence Transformers model. Please use `OVSentenceTransformer` to load this model."
)

super().__init__(model, config, **kwargs)

@add_start_docstrings_to_model_forward(
Expand Down Expand Up @@ -417,7 +423,6 @@ def _from_transformers(
cls,
model_id: str,
config: PretrainedConfig,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand All @@ -430,15 +435,6 @@ def _from_transformers(
quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
**kwargs,
):
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

save_dir = TemporaryDirectory()
save_dir_path = Path(save_dir.name)
# This attribute is needed to keep one reference on the temporary directory, since garbage collecting
Expand Down Expand Up @@ -591,7 +587,6 @@ def from_pretrained(
model_id: Union[str, Path],
export: bool = False,
config: Optional["PretrainedConfig"] = None,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand All @@ -602,15 +597,6 @@ def from_pretrained(
trust_remote_code: bool = False,
**kwargs,
):
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

# Fix the mismatch between timm_config and huggingface_config
local_timm_model = _is_timm_ov_dir(model_id)
if local_timm_model or (not os.path.isdir(model_id) and model_info(model_id).library_name == "timm"):
Expand Down
Loading