Skip to content

Commit

Permalink
fix: external download (#80)
Browse files Browse the repository at this point in the history
* fix download

* add more

* add renamed tasks

* add renamed tasks
  • Loading branch information
Samoed authored Dec 22, 2024
1 parent 6fbbd2e commit 2b0b404
Show file tree
Hide file tree
Showing 2,718 changed files with 134,368 additions and 57,481 deletions.
18 changes: 15 additions & 3 deletions load_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,19 @@ def get_model_dir(model_id: str) -> Path:
return external_result_dir


renamed_tasks = {
"NorwegianParliament": "NorwegianParliamentClassification",
"CMedQAv2": "CMedQAv2-reranking",
"CMedQAv1": "CMedQAv1-reranking",
"8TagsClustering": "EightTagsClustering",
"PPC": "PpcPC",
"PawsX": "PawsXParaphraseIdentification",
}


def simplify_dataset_name(name: str) -> str:
return name.replace("MTEB ", "").split()[0]
task_name = name.replace("MTEB ", "").split()[0]
return renamed_tasks.get(task_name, task_name)


def get_model_parameters_memory(model_info: ModelInfo) -> tuple[int| None, float|None]:
Expand Down Expand Up @@ -140,7 +151,8 @@ def parse_readme(model_info: ModelInfo) -> dict[str, dict[str, Any]] | None:
model_results = {}
for result in results:
dataset = result["dataset"]
dataset_type = dataset["type"] # type is repo of the dataset
dataset_type = simplify_dataset_name(dataset["name"])

if dataset_type not in model_results:
output_dict = {
"dataset_revision": dataset.get("revision", ""),
Expand Down Expand Up @@ -195,7 +207,7 @@ def parse_readme(model_info: ModelInfo) -> dict[str, dict[str, Any]] | None:

def get_mteb_data() -> None:
models = sorted(list(API.list_models(filter="mteb", full=True)), key=lambda x: x.id)
# models = [model for model in models if model.id == "intfloat/multilingual-e5-large"]
# models = [model for model in models if model.id == "ai-forever/ru-en-RoSBERTa"]
for i, model_info in enumerate(models, start=1):
logger.info(f"[{i}/{len(models)}] Processing {model_info.id}")
model_path = get_model_dir(model_info.id)
Expand Down
Loading

0 comments on commit 2b0b404

Please sign in to comment.