Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new models nvidia, gte, linq #1436

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mteb/models/arctic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from mteb.model_meta import ModelMeta, sentence_transformers_loader

arctic_m_v1_5 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="Snowflake/snowflake-arctic-embed-m-v1.5",
revision="97eab2e17fcb7ccb8bb94d6e547898fa1a6a0f47",
Expand Down
6 changes: 3 additions & 3 deletions mteb/models/bge_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
model_prompts = {"query": "Represent this sentence for searching relevant passages: "}

bge_small_en_v1_5 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="BAAI/bge-small-en-v1.5",
revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a",
Expand All @@ -30,7 +30,7 @@
)

bge_base_en_v1_5 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="BAAI/bge-base-en-v1.5",
revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a",
Expand All @@ -53,7 +53,7 @@
)

bge_large_en_v1_5 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="BAAI/bge-large-en-v1.5",
revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09",
Expand Down
4 changes: 2 additions & 2 deletions mteb/models/cohere_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def encode(
}

cohere_mult_3 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
CohereTextEmbeddingModel,
model_name="embed-multilingual-v3.0",
model_prompts=model_prompts,
Expand All @@ -95,7 +95,7 @@ def encode(
)

cohere_eng_3 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
CohereTextEmbeddingModel,
model_name="embed-multilingual-v3.0",
model_prompts=model_prompts,
Expand Down
11 changes: 5 additions & 6 deletions mteb/models/e5_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@
MISTRAL_LANGUAGES = ["eng_Latn", "fra_Latn", "deu_Latn", "ita_Latn", "spa_Latn"]


def e5_instruction(instruction: str) -> str:
return f"Instruct: {instruction}\nQuery: "
E5_INSTRUCTION = "Instruct: {instruction}\nQuery: "


e5_instruct = ModelMeta(
loader=partial(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="intfloat/multilingual-e5-large-instruct",
instruction_template=e5_instruction,
instruction_template=E5_INSTRUCTION,
attn="cccc",
pooling_method="mean",
mode="embedding",
Expand All @@ -44,10 +43,10 @@ def e5_instruction(instruction: str) -> str:
)

e5_mistral = ModelMeta(
loader=partial(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="intfloat/e5-mistral-7b-instruct",
instruction_template=e5_instruction,
instruction_template=E5_INSTRUCTION,
attn="cccc",
pooling_method="lasttoken",
mode="embedding",
Expand Down
14 changes: 7 additions & 7 deletions mteb/models/e5_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
}

e5_mult_small = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/multilingual-e5-small",
revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
Expand All @@ -137,7 +137,7 @@
)

e5_mult_base = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/multilingual-e5-base",
model_prompts=model_prompts,
Expand All @@ -159,7 +159,7 @@
)

e5_mult_large = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/multilingual-e5-large",
revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
Expand All @@ -182,7 +182,7 @@
)

e5_eng_small_v2 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/e5-small-v2",
model_prompts=model_prompts,
Expand All @@ -204,7 +204,7 @@
)

e5_eng_small = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/e5-small",
revision="e272f3049e853b47cb5ca3952268c6662abda68f",
Expand All @@ -227,7 +227,7 @@
)

e5_eng_base_v2 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/e5-base-v2",
revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
Expand All @@ -250,7 +250,7 @@
)

e5_eng_large_v2 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="intfloat/e5-large-v2",
revision="b322e09026e4ea05f42beadf4d661fb4e101d311",
Expand Down
2 changes: 1 addition & 1 deletion mteb/models/google_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def encode(

name = "text-embedding-004"
google_emb_004 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
GoogleTextEmbeddingModel,
model_name=name,
model_prompts={
Expand Down
4 changes: 2 additions & 2 deletions mteb/models/gritlm_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def gritlm_instruction(instruction: str = "") -> str:


gritlm7b = ModelMeta(
loader=partial(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="GritLM/GritLM-7B",
instruction_template=gritlm_instruction,
Expand All @@ -40,7 +40,7 @@ def gritlm_instruction(instruction: str = "") -> str:
use_instructions=True,
)
gritlm8x7b = ModelMeta(
loader=partial(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="GritLM/GritLM-8x7B",
instruction_template=gritlm_instruction,
Expand Down
68 changes: 65 additions & 3 deletions mteb/models/gte_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,21 @@

from functools import partial

import torch

from mteb.model_meta import ModelMeta
from mteb.models.instruct_wrapper import instruct_wrapper


def instruction_template(instruction: str) -> str:
return f"Instruct: {instruction}\nQuery: " if instruction else ""

from .instruct_wrapper import instruct_wrapper

gte_Qwen2_7B_instruct = ModelMeta(
loader=partial(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="Alibaba-NLP/gte-Qwen2-7B-instruct",
instruction_template="Instruct: {instruction}\nQuery: ",
instruction_template=instruction_template,
attn="cccc",
pooling_method="lasttoken",
mode="embedding",
Expand All @@ -33,3 +39,59 @@
framework=["Sentence Transformers", "PyTorch"],
use_instructions=True,
)


gte_Qwen1_5_7B_instruct = ModelMeta(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="Alibaba-NLP/gte-Qwen1.5-7B-instruct",
instruction_template=instruction_template,
attn="cccc",
pooling_method="lasttoken",
mode="embedding",
torch_dtype="auto",
normalized=True,
),
name="Alibaba-NLP/gte-Qwen1.5-7B-instruct",
languages=["eng_Latn"],
open_weights=True,
revision="07d27e5226328010336563bc1b564a5e3436a298",
release_date="2024-04-20", # initial commit of hf model.
n_parameters=7_720_000_000,
memory_usage=None,
embed_dim=4096,
license="apache-2.0",
max_tokens=32768,
reference="https://huggingface.co/Alibaba-NLP/gte-Qwen1.5-7B-instruct",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
use_instructions=True,
)


gte_Qwen2_1_5B_instruct = ModelMeta(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
instruction_template=instruction_template,
attn="cccc",
pooling_method="lasttoken",
mode="embedding",
torch_dtype="auto",
normalized=True,
),
name="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
languages=["eng_Latn"],
open_weights=True,
revision="c6c1b92f4a3e1b92b326ad29dd3c8433457df8dd",
release_date="2024-07-29", # initial commit of hf model.
n_parameters=1_780_000_000,
memory_usage=None,
embed_dim=8960,
license="apache-2.0",
max_tokens=131072,
reference="https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
use_instructions=True,
)
2 changes: 1 addition & 1 deletion mteb/models/jina_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def encode(


jina_embeddings_v3 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
JinaWrapper,
model="jinaai/jina-embeddings-v3",
revision="215a6e121fa0183376388ac6b1ae230326bfeaed",
Expand Down
40 changes: 40 additions & 0 deletions mteb/models/linq_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from __future__ import annotations

from functools import partial

import torch

from mteb.model_meta import ModelMeta
from mteb.models.instruct_wrapper import instruct_wrapper


def instruction_template(instruction: str) -> str:
return f"Instruct: {instruction}\nQuery: " if instruction else ""


Linq_Embed_Mistral = ModelMeta(
loader=partial( # type: ignore
instruct_wrapper,
model_name_or_path="Linq-AI-Research/Linq-Embed-Mistral",
instruction_template=instruction_template,
attn="cccc",
pooling_method="lasttoken",
mode="embedding",
torch_dtype=torch.bfloat16,
normalized=True,
),
name="Linq-AI-Research/Linq-Embed-Mistral",
languages=["eng_Latn"],
open_weights=True,
revision="0c1a0b0589177079acc552433cad51d7c9132379",
release_date="2024-05-29", # initial commit of hf model.
n_parameters=7_110_000_000,
memory_usage=None,
embed_dim=4096,
license="cc-by-nc-4.0",
max_tokens=32768,
reference="https://huggingface.co/Linq-AI-Research/Linq-Embed-Mistral",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
use_instructions=True,
)
2 changes: 1 addition & 1 deletion mteb/models/mxbai_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from mteb.model_meta import ModelMeta, sentence_transformers_loader

mxbai_embed_large_v1 = ModelMeta(
loader=partial(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="mixedbread-ai/mxbai-embed-large-v1",
revision="990580e27d329c7408b3741ecff85876e128e203",
Expand Down
Loading
Loading