Skip to content

Commit

Permalink
[mieb] mieb scripts (siglip rerun & linear probing ablation & params …
Browse files Browse the repository at this point in the history
…count) (#1429)

* mieb scripts

* lint
  • Loading branch information
gowitheflow-1998 authored Nov 10, 2024
1 parent f0dd6f6 commit 66176a0
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 0 deletions.
28 changes: 28 additions & 0 deletions scripts/run_mieb_get_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

import pandas as pd
import torch
from tqdm import tqdm

import mteb

params = []

# add all model names
model_names = [ # "google/siglip-base-patch16-512",
"google/siglip-so400m-patch14-384"
# ...
]

for model_name in tqdm(model_names):
model = mteb.get_model(model_name)

total_params = sum(p.numel() for p in model.model.parameters())
total_params = total_params / 1e6
params.append([model_name, total_params])

del model
torch.cuda.empty_cache()

param_frame = pd.DataFrame(params, columns=["model name", "# params"])
param_frame.to_csv("params.csv", index=False)
71 changes: 71 additions & 0 deletions scripts/run_mieb_kshot_ablation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from __future__ import annotations

import mteb

for model_name in [
# key ones for this ablation (different types of models)
"openai/clip-vit-base-patch32",
"openai/clip-vit-base-patch16",
"openai/clip-vit-large-patch14",
"royokong/e5-v",
"facebook/dinov2-small",
"facebook/dinov2-base",
"facebook/dinov2-large",
"facebook/dinov2-giant",
# more insights
"BAAI/bge-visualized-base",
"BAAI/bge-visualized-m3",
"google/siglip-so400m-patch14-384",
"google/siglip-base-patch16-256-multilingual",
"google/siglip-base-patch16-256",
"google/siglip-base-patch16-512",
"google/siglip-base-patch16-384",
"google/siglip-base-patch16-224",
"google/siglip-large-patch16-256",
"google/siglip-large-patch16-384",
"nyu-visionx/moco-v3-vit-b",
"nyu-visionx/moco-v3-vit-l",
"laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K",
"laion/CLIP-ViT-B-16-DataComp.XL-s13B-b90K",
"laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K",
"laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K",
"laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90K",
"laion/CLIP-ViT-B-16-DataComp.XL-s13B-b90K",
"laion/CLIP-ViT-bigG-14-laion2B-39B-b160k",
"laion/CLIP-ViT-g-14-laion2B-s34B-b88K",
"laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
"laion/CLIP-ViT-L-14-laion2B-s32B-b82K",
"laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
"EVA02-CLIP-B-16",
"EVA02-CLIP-L-14",
"EVA02-CLIP-bigE-14",
"EVA02-CLIP-bigE-14-plus",
"TIGER-Lab/VLM2Vec-LoRA",
"TIGER-Lab/VLM2Vec-Full",
# run if enough compute:
# "Salesforce/blip-itm-base-coco",
# "Salesforce/blip-itm-large-coco",
# "Salesforce/blip-itm-base-flickr",
# "Salesforce/blip-itm-large-flickr",
# "kakaobrain/align-base",
# "jinaai/jina-clip-v1",
# "nomic-ai/nomic-embed-vision-v1.5",
# "Salesforce/blip2-opt-2.7b",
# "Salesforce/blip2-opt-6.7b-coco",
# "embed-english-v3.0-v", # not feasible to run due to the 40 images/min constraint
]:
# 16 by default already

for k_shot in [8, 32, 64, 128, 256]:
model = mteb.get_model(model_name)
tasks = mteb.get_tasks(
task_types=[
"ImageClassification",
]
)
for task in tasks:
task.samples_per_label = k_shot
evaluation = mteb.MTEB(tasks=tasks)
results = evaluation.run(
model, output_folder=f"results-mieb-final/linear_probe_{k_shot}"
)
30 changes: 30 additions & 0 deletions scripts/run_mieb_rerun_siglip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from __future__ import annotations

import mteb

for model_name in [
"google/siglip-so400m-patch14-384",
"google/siglip-base-patch16-256-multilingual",
"google/siglip-base-patch16-256",
"google/siglip-base-patch16-512",
"google/siglip-base-patch16-384",
"google/siglip-base-patch16-224",
"google/siglip-large-patch16-256",
"google/siglip-large-patch16-384",
]:
model = mteb.get_model(model_name)
tasks = mteb.get_tasks(
task_types=[
"Any2AnyRetrieval",
"Any2AnyMultiChoice",
"Any2TextMutipleChoice",
"ImageClustering",
"ImageClassification",
"ImageMultilabelClassification",
"ImageTextPairClassification",
# "VisualSTS", # visual sts does not need rerun as will be the same after fixed.
"ZeroShotClassification",
]
)
evaluation = mteb.MTEB(tasks=tasks)
results = evaluation.run(model, output_folder="results-mieb-final/siglip_rerun")

0 comments on commit 66176a0

Please sign in to comment.