Skip to content

Commit

Permalink
fix: Added the necessary trust_remote_code (#1406)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexeyVatolin authored Nov 7, 2024
1 parent a85c550 commit fd8b283
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 4 deletions.
8 changes: 8 additions & 0 deletions mteb/abstasks/AbsTaskRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
qrels_file: str = "",
streaming: bool = False,
keep_in_memory: bool = False,
trust_remote_code: bool = False,
):
self.corpus = {}
self.queries = {}
Expand Down Expand Up @@ -63,6 +64,7 @@ def __init__(
self.qrels_file = qrels_file
self.streaming = streaming
self.keep_in_memory = keep_in_memory
self.trust_remote_code = trust_remote_code

@staticmethod
def check(fIn: str, ext: str):
Expand Down Expand Up @@ -125,6 +127,7 @@ def _load_corpus(self):
"corpus",
keep_in_memory=self.keep_in_memory,
streaming=self.streaming,
trust_remote_code=self.trust_remote_code,
)
else:
corpus_ds = load_dataset(
Expand Down Expand Up @@ -152,6 +155,7 @@ def _load_queries(self):
"queries",
keep_in_memory=self.keep_in_memory,
streaming=self.streaming,
trust_remote_code=self.trust_remote_code,
)
else:
queries_ds = load_dataset(
Expand All @@ -174,6 +178,7 @@ def _load_qrels(self, split):
self.hf_repo_qrels,
keep_in_memory=self.keep_in_memory,
streaming=self.streaming,
trust_remote_code=self.trust_remote_code,
)[split]
else:
qrels_ds = load_dataset(
Expand Down Expand Up @@ -254,6 +259,9 @@ def load_data(self, **kwargs):
hf_repo_qrels=hf_repo_qrels,
streaming=False,
keep_in_memory=False,
trust_remote_code=self.metadata_dict["dataset"].get(
"trust_remote_code", False
),
).load(split=split)
# Conversion from DataSet
queries = {query["id"]: query["text"] for query in queries}
Expand Down
2 changes: 1 addition & 1 deletion mteb/tasks/PairClassification/multilingual/XStance.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def load_data(self, **kwargs):
self.dataset = {}
path = self.metadata_dict["dataset"]["path"]
revision = self.metadata_dict["dataset"]["revision"]
raw_dataset = load_dataset(path, revision=revision)
raw_dataset = load_dataset(path, revision=revision, trust_remote_code=True)

def convert_example(example):
return {
Expand Down
7 changes: 5 additions & 2 deletions mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ def load_mldr_data(

for lang in langs:
lang_corpus = datasets.load_dataset(
path, f"corpus-{lang}", cache_dir=cache_dir, revision=revision
path,
f"corpus-{lang}",
cache_dir=cache_dir,
revision=revision,
trust_remote_code=True,
)["corpus"]
lang_corpus = {e["docid"]: {"text": e["text"]} for e in lang_corpus}
lang_data = datasets.load_dataset(path, lang, cache_dir=cache_dir)
Expand All @@ -65,7 +69,6 @@ class MultiLongDocRetrieval(MultilingualTask, AbsTaskRetrieval):
dataset={
"path": "Shitao/MLDR",
"revision": "d67138e705d963e346253a80e59676ddb418810a",
"trust_remote_code": True,
},
type="Retrieval",
category="s2p",
Expand Down
4 changes: 3 additions & 1 deletion mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def _load_xmarket_data(
languages=[lang],
split=split,
cache_dir=cache_dir,
trust_remote_code=True,
)
query_rows = datasets.load_dataset(
path,
Expand All @@ -38,6 +39,7 @@ def _load_xmarket_data(
revision=revision,
split=split,
cache_dir=cache_dir,
trust_remote_code=True,
)
qrels_rows = datasets.load_dataset(
path,
Expand All @@ -46,6 +48,7 @@ def _load_xmarket_data(
revision=revision,
split=split,
cache_dir=cache_dir,
trust_remote_code=True,
)

corpus[lang][split] = {row["_id"]: row for row in corpus_rows}
Expand All @@ -69,7 +72,6 @@ class XMarket(MultilingualTask, AbsTaskRetrieval):
dataset={
"path": "jinaai/xmarket_ml",
"revision": "dfe57acff5b62c23732a7b7d3e3fb84ff501708b",
"trust_remote_code": True,
},
type="Retrieval",
category="s2p",
Expand Down

0 comments on commit fd8b283

Please sign in to comment.