We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
An error occurs when loading SICK-BR-STS and XStance tasks.
SICK-BR-STS
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[3], line 1 ----> 1 task.load_data() File ~/.pyenv/versions/3.11.2/lib/python3.11/site-packages/mteb/abstasks/AbsTask.py:189, in AbsTask.load_data(self, **kwargs) 187 return 188 self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # type: ignore --> 189 self.dataset_transform() 190 self.data_loaded = True File ~/.pyenv/versions/3.11.2/lib/python3.11/site-packages/mteb/tasks/STS/por/SickBrSTS.py:66, in SickBrSTS.dataset_transform(self) 62 def dataset_transform(self): 63 for split in self.dataset: 64 self.dataset.update( 65 { ---> 66 split: self.dataset[split].train_test_split( 67 test_size=N_SAMPLES, seed=self.seed, label="entailment_label" 68 )["test"] 69 } 70 ) 72 self.dataset = self.dataset.rename_columns( 73 { 74 "sentence_A": "sentence1", (...) 78 } 79 ) File ~/.pyenv/versions/3.11.2/lib/python3.11/site-packages/datasets/arrow_dataset.py:567, in transmit_format.<locals>.wrapper(*args, **kwargs) 560 self_format = { 561 "type": self._format_type, 562 "format_kwargs": self._format_kwargs, 563 "columns": self._format_columns, 564 "output_all_columns": self._output_all_columns, 565 } 566 # apply actual function --> 567 out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) 568 datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out] 569 # re-apply format to the output File ~/.pyenv/versions/3.11.2/lib/python3.11/site-packages/datasets/fingerprint.py:482, in fingerprint_transform.<locals>._fingerprint.<locals>.wrapper(*args, **kwargs) 478 validate_fingerprint(kwargs[fingerprint_name]) 480 # Call actual function --> 482 out = func(dataset, *args, **kwargs) 484 # Update fingerprint of in-place transforms + update in-place history of transforms 486 if inplace: # update after calling func so that the fingerprint doesn't change if the function fails TypeError: Dataset.train_test_split() got an unexpected keyword argument 'label'
XStance
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Cell In[7], line 1 ----> 1 task.load_data() File ~/.pyenv/versions/3.11.2/lib/python3.11/site-packages/mteb/tasks/PairClassification/multilingual/XStance.py:91, in XStance.load_data(self, **kwargs) 85 # convert examples 86 self.dataset[lang][split] = self.dataset[lang][split].map( 87 convert_example, 88 remove_columns=self.dataset[lang][split].column_names, 89 ) ---> 91 self.dataset_transform() 92 self.data_loaded = True File ~/.pyenv/versions/3.11.2/lib/python3.11/site-packages/mteb/tasks/PairClassification/multilingual/XStance.py:103, in XStance.dataset_transform(self) 99 _dataset[lang] = {} 100 for split in self.metadata.eval_splits: 101 _dataset[lang][split] = [ 102 { --> 103 "sent1": self.dataset[lang][split]["sent1"], 104 "sent2": self.dataset[lang][split]["sent2"], 105 "labels": self.dataset[lang][split]["labels"], 106 } 107 ] 108 self.dataset = _dataset File ~/.pyenv/versions/3.11.2/lib/python3.11/site-packages/datasets/arrow_dataset.py:2861, in Dataset.__getitem__(self, key) 2859 def __getitem__(self, key): # noqa: F811 2860 """Can be used to index columns (by string names) or rows (by integer index or iterable of indices or bools).""" -> 2861 return self._getitem(key) File ~/.pyenv/versions/3.11.2/lib/python3.11/site-packages/datasets/arrow_dataset.py:2845, in Dataset._getitem(self, key, **kwargs) 2843 format_kwargs = format_kwargs if format_kwargs is not None else {} 2844 formatter = get_formatter(format_type, features=self._info.features, **format_kwargs) -> 2845 pa_subtable = query_table(self._data, key, indices=self._indices) 2846 formatted_output = format_table( 2847 pa_subtable, key, formatter=formatter, format_columns=format_columns, output_all_columns=output_all_columns 2848 ) 2849 return formatted_output File ~/.pyenv/versions/3.11.2/lib/python3.11/site-packages/datasets/formatting/formatting.py:584, in query_table(table, key, indices) 582 _raise_bad_key_type(key) 583 if isinstance(key, str): --> 584 _check_valid_column_key(key, table.column_names) 585 else: 586 size = indices.num_rows if indices is not None else table.num_rows File ~/.pyenv/versions/3.11.2/lib/python3.11/site-packages/datasets/formatting/formatting.py:521, in _check_valid_column_key(key, columns) 519 def _check_valid_column_key(key: str, columns: List[str]) -> None: 520 if key not in columns: --> 521 raise KeyError(f"Column {key} not in the dataset. Current columns in the dataset: {columns}") KeyError: "Column sent1 not in the dataset. Current columns in the dataset: ['sentence1', 'sentence2', 'labels']"
Example to reproduce the error:
import mteb task = mteb.get_task("SICK-BR-STS") # or XStance task.load_data()
The text was updated successfully, but these errors were encountered:
No branches or pull requests
An error occurs when loading SICK-BR-STS and XStance tasks.
SICK-BR-STS
XStance
Example to reproduce the error:
The text was updated successfully, but these errors were encountered: