AFQMC |
['cmn'] |
STS |
s2s |
|
None |
None |
AILACasedocs |
['eng'] |
Retrieval |
p2p |
[Legal, Written] |
None |
None |
AILAStatutes |
['eng'] |
Retrieval |
p2p |
[Legal, Written] |
None |
None |
AJGT (Alomari et al., 2017) |
['ara'] |
Classification |
s2s |
[Social, Written] |
None |
None |
ARCChallenge (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
ATEC |
['cmn'] |
STS |
s2s |
|
None |
None |
AfriSentiClassification |
['amh', 'arq', 'ary', 'hau', 'ibo', 'kin', 'pcm', 'por', 'swa', 'tso', 'twi', 'yor'] |
Classification |
s2s |
[Social, Written] |
None |
None |
AfriSentiLangClassification |
['amh', 'arq', 'ary', 'hau', 'ibo', 'kin', 'pcm', 'por', 'swa', 'tso', 'twi', 'yor'] |
Classification |
s2s |
[Social, Written] |
None |
None |
AllegroReviews |
['pol'] |
Classification |
s2s |
|
None |
None |
AlloProfClusteringP2P.v2 (Lefebvre-Brossard et al., 2023) |
['fra'] |
Clustering |
p2p |
[Encyclopaedic, Written] |
None |
None |
AlloProfClusteringS2S.v2 (Lefebvre-Brossard et al., 2023) |
['fra'] |
Clustering |
s2s |
[Encyclopaedic, Written] |
None |
None |
AlloprofReranking (Lefebvre-Brossard et al., 2023) |
['fra'] |
Reranking |
s2p |
[Web, Academic, Written] |
None |
None |
AlloprofRetrieval (Lefebvre-Brossard et al., 2023) |
['fra'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
AlphaNLI (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
AmazonCounterfactualClassification |
['deu', 'eng', 'jpn'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
AmazonPolarityClassification (Julian McAuley, 2013) |
['eng'] |
Classification |
p2p |
[Reviews, Written] |
None |
None |
AmazonReviewsClassification (Phillip Keung, 2020) |
['cmn', 'deu', 'eng', 'fra', 'jpn', 'spa'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
AngryTweetsClassification (Pauli et al., 2021) |
['dan'] |
Classification |
s2s |
[Social, Written] |
None |
None |
AppsRetrieval (Dan Hendrycks, 2021) |
['eng', 'python'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 12530} |
{'test': {'number_of_characters': 2245.84, 'num_samples': 12530, 'num_queries': 3765, 'num_documents': 8765, 'average_document_length': 0.07, 'average_query_length': 0.44, 'average_relevant_docs_per_query': 1.0}} |
ArEntail (Obeidat et al., 2024) |
['ara'] |
PairClassification |
s2s |
[News, Written] |
None |
None |
ArXivHierarchicalClusteringP2P |
['eng'] |
Clustering |
p2p |
[Academic, Written] |
{'test': 2048} |
{'test': {'num_samples': 2048, 'number_of_characters': 2065284, 'average_text_length': 1008.44, 'average_labels_per_text': 1.46, 'unique_labels': 129, 'labels': {'cs': {'count': 356}, 'math': {'count': 381}, 'OC': {'count': 11}, 'hep-lat': {'count': 13}, 'hep': {'count': 98}, 'astro-ph': {'count': 213}, 'eess': {'count': 76}, 'quant-ph': {'count': 135}, 'DC': {'count': 5}, 'cond-mat': {'count': 274}, 'hep-th': {'count': 66}, 'SP': {'count': 33}, 'hep-ph': {'count': 69}, 'FA': {'count': 6}, 'nucl-th': {'count': 17}, 'q-bio': {'count': 80}, 'HE': {'count': 22}, 'HC': {'count': 2}, 'stat': {'count': 60}, 'ML': {'count': 16}, 'IV': {'count': 13}, 'stat-mech': {'count': 47}, 'DS': {'count': 14}, 'ME': {'count': 12}, 'CC': {'count': 2}, 'mtrl-sci': {'count': 22}, 'PE': {'count': 16}, 'NT': {'count': 11}, 'SC': {'count': 6}, 'AG': {'count': 13}, 'physics': {'count': 81}, 'ins-det': {'count': 9}, 'GA': {'count': 18}, 'BM': {'count': 6}, 'GN': {'count': 17}, 'NA': {'count': 15}, 'app-ph': {'count': 7}, 'RT': {'count': 6}, 'other': {'count': 37}, 'soft': {'count': 15}, 'CO': {'count': 33}, 'supr-con': {'count': 21}, 'chem-ph': {'count': 3}, 'DM': {'count': 2}, 'MN': {'count': 12}, 'q-fin': {'count': 27}, 'PM': {'count': 2}, 'AP': {'count': 27}, 'gr-qc': {'count': 15}, 'quant-gas': {'count': 8}, 'mes-hall': {'count': 33}, 'IT': {'count': 19}, 'SI': {'count': 6}, 'SG': {'count': 3}, 'bio-ph': {'count': 2}, 'SR': {'count': 16}, 'soc-ph': {'count': 5}, 'hep-ex': {'count': 15}, 'DG': {'count': 11}, 'NE': {'count': 5}, 'CR': {'count': 6}, 'CL': {'count': 12}, 'RM': {'count': 3}, 'econ': {'count': 17}, 'nlin': {'count': 5}, 'PS': {'count': 1}, 'LG': {'count': 26}, 'QA': {'count': 9}, 'str-el': {'count': 26}, 'CV': {'count': 34}, 'MF': {'count': 6}, 'IM': {'count': 7}, 'EM': {'count': 6}, 'TH': {'count': 5}, 'PR': {'count': 20}, 'AT': {'count': 4}, 'OA': {'count': 4}, 'CP': {'count': 6}, 'LO': {'count': 14}, 'flu-dyn': {'count': 6}, 'atom-ph': {'count': 8}, 'class-ph': {'count': 1}, 'SY': {'count': 20}, 'IR': {'count': 1}, 'plasm-ph': {'count': 8}, 'CE': {'count': 2}, 'AO': {'count': 1}, 'comp-ph': {'count': 3}, 'optics': {'count': 12}, 'MG': {'count': 4}, 'ST': {'count': 6}, 'nucl-ex': {'count': 6}, 'CY': {'count': 9}, 'ao-ph': {'count': 2}, 'DB': {'count': 1}, 'math-ph': {'count': 10}, 'NC': {'count': 13}, 'GT': {'count': 11}, 'TO': {'count': 2}, 'AI': {'count': 9}, 'NI': {'count': 2}, 'gen-ph': {'count': 4}, 'OT': {'count': 4}, 'SD': {'count': 2}, 'dis-nn': {'count': 4}, 'RO': {'count': 7}, 'CA': {'count': 6}, 'FL': {'count': 1}, 'SE': {'count': 5}, 'EP': {'count': 9}, 'hist-ph': {'count': 1}, 'QM': {'count': 9}, 'ed-ph': {'count': 2}, 'GR': {'count': 4}, 'MS': {'count': 1}, 'CD': {'count': 1}, 'ET': {'count': 1}, 'acc-ph': {'count': 5}, 'AC': {'count': 2}, 'OH': {'count': 1}, 'EC': {'count': 2}, 'DL': {'count': 1}, 'AS': {'count': 3}, 'geo-ph': {'count': 2}, 'CG': {'count': 3}, 'CB': {'count': 1}, 'AR': {'count': 1}, 'TR': {'count': 1}, 'atm-clus': {'count': 1}}}} |
ArXivHierarchicalClusteringS2S |
['eng'] |
Clustering |
p2p |
[Academic, Written] |
None |
None |
ArguAna (Boteva et al., 2016) |
['eng'] |
Retrieval |
s2p |
[Medical, Written] |
None |
None |
ArguAna-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
None |
ArmenianParaphrasePC (Arthur Malajyan, 2020) |
['hye'] |
PairClassification |
s2s |
[News, Written] |
None |
None |
ArxivClassification (He et al., 2019) |
['eng'] |
Classification |
s2s |
[Academic, Written] |
None |
None |
AskUbuntuDupQuestions |
['eng'] |
Reranking |
s2s |
|
{'test': 375} |
{'test': {'num_samples': 375, 'number_of_characters': 413674, 'num_positive': 2255, 'num_negative': 5245, 'avg_query_len': 50.21, 'avg_positive_len': 52.54, 'avg_negative_len': 52.69}} |
Assin2RTE (Real et al., 2020) |
['por'] |
PairClassification |
s2s |
[Written] |
None |
None |
Assin2STS (Real et al., 2020) |
['por'] |
STS |
s2s |
[Written] |
None |
None |
BIOSSES (Soğancıoğlu et al., 2017) |
['eng'] |
STS |
s2s |
|
None |
None |
BQ (Shitao Xiao, 2024) |
['cmn'] |
STS |
s2s |
|
None |
None |
BSARDRetrieval (Louis et al., 2022) |
['fra'] |
Retrieval |
s2p |
[Legal, Spoken] |
None |
None |
BUCC.v2 |
['cmn', 'deu', 'eng', 'fra', 'rus'] |
BitextMining |
s2s |
[Written] |
None |
None |
Banking77Classification |
['eng'] |
Classification |
s2s |
[Written] |
None |
None |
BelebeleRetrieval (Lucas Bandarkar, 2023) |
['acm', 'afr', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'azj', 'bam', 'ben', 'bod', 'bul', 'cat', 'ceb', 'ces', 'ckb', 'dan', 'deu', 'ell', 'eng', 'est', 'eus', 'fin', 'fra', 'fuv', 'gaz', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kac', 'kan', 'kat', 'kaz', 'kea', 'khk', 'khm', 'kin', 'kir', 'kor', 'lao', 'lin', 'lit', 'lug', 'luo', 'lvs', 'mal', 'mar', 'mkd', 'mlt', 'mri', 'mya', 'nld', 'nob', 'npi', 'nso', 'nya', 'ory', 'pan', 'pbt', 'pes', 'plt', 'pol', 'por', 'ron', 'rus', 'shn', 'sin', 'slk', 'slv', 'sna', 'snd', 'som', 'sot', 'spa', 'srp', 'ssw', 'sun', 'swe', 'swh', 'tam', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tsn', 'tso', 'tur', 'ukr', 'urd', 'uzn', 'vie', 'war', 'wol', 'xho', 'yor', 'zho', 'zsm', 'zul'] |
Retrieval |
s2p |
[Web, News, Written] |
{'test': 521866} |
{'test': {'number_of_characters': 76.5, 'num_samples': 521866, 'num_queries': 338378, 'num_documents': 183488, 'average_document_length': 0.0, 'average_query_length': 0.0, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'acm_Arab-acm_Arab': {'number_of_characters': 57.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'acm_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-acm_Arab': {'number_of_characters': 57.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'afr_Latn-afr_Latn': {'number_of_characters': 80.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'afr_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-afr_Latn': {'number_of_characters': 80.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'als_Latn-als_Latn': {'number_of_characters': 78.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'als_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-als_Latn': {'number_of_characters': 78.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'amh_Ethi-amh_Ethi': {'number_of_characters': 51.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'amh_Ethi-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-amh_Ethi': {'number_of_characters': 51.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'apc_Arab-apc_Arab': {'number_of_characters': 57.86, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'apc_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-apc_Arab': {'number_of_characters': 57.86, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-arb_Arab': {'number_of_characters': 60.55, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arb_Arab': {'number_of_characters': 60.55, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-arb_Latn': {'number_of_characters': 69.02, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arb_Latn': {'number_of_characters': 69.02, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ars_Arab-ars_Arab': {'number_of_characters': 58.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'ars_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ars_Arab': {'number_of_characters': 58.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'ary_Arab-ary_Arab': {'number_of_characters': 68.02, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ary_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ary_Arab': {'number_of_characters': 68.02, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arz_Arab-arz_Arab': {'number_of_characters': 59.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'arz_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arz_Arab': {'number_of_characters': 59.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'asm_Beng-asm_Beng': {'number_of_characters': 70.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'asm_Beng-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-asm_Beng': {'number_of_characters': 70.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'azj_Latn-azj_Latn': {'number_of_characters': 75.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'azj_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-azj_Latn': {'number_of_characters': 75.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bam_Latn-bam_Latn': {'number_of_characters': 74.34, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bam_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bam_Latn': {'number_of_characters': 74.34, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-ben_Beng': {'number_of_characters': 71.48, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ben_Beng': {'number_of_characters': 71.48, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-ben_Latn': {'number_of_characters': 76.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ben_Latn': {'number_of_characters': 76.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bod_Tibt-bod_Tibt': {'number_of_characters': 88.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'bod_Tibt-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bod_Tibt': {'number_of_characters': 88.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'bul_Cyrl-bul_Cyrl': {'number_of_characters': 74.89, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'bul_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bul_Cyrl': {'number_of_characters': 74.89, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'cat_Latn-cat_Latn': {'number_of_characters': 77.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'cat_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-cat_Latn': {'number_of_characters': 77.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ceb_Latn-ceb_Latn': {'number_of_characters': 83.2, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ceb_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ceb_Latn': {'number_of_characters': 83.2, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ces_Latn-ces_Latn': {'number_of_characters': 69.73, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ces_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ces_Latn': {'number_of_characters': 69.73, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ckb_Arab-ckb_Arab': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ckb_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ckb_Arab': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'dan_Latn-dan_Latn': {'number_of_characters': 74.97, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'dan_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-dan_Latn': {'number_of_characters': 74.97, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'deu_Latn-deu_Latn': {'number_of_characters': 77.32, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'deu_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-deu_Latn': {'number_of_characters': 77.32, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ell_Grek-ell_Grek': {'number_of_characters': 88.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ell_Grek-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ell_Grek': {'number_of_characters': 88.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'est_Latn-est_Latn': {'number_of_characters': 69.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'est_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-est_Latn': {'number_of_characters': 69.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'eus_Latn-eus_Latn': {'number_of_characters': 76.45, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'eus_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-eus_Latn': {'number_of_characters': 76.45, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'fin_Latn-fin_Latn': {'number_of_characters': 74.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'fin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fin_Latn': {'number_of_characters': 74.51, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'fra_Latn-fra_Latn': {'number_of_characters': 92.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'fra_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fra_Latn': {'number_of_characters': 92.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'fuv_Latn-fuv_Latn': {'number_of_characters': 60.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'fuv_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fuv_Latn': {'number_of_characters': 60.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'gaz_Latn-gaz_Latn': {'number_of_characters': 87.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'gaz_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-gaz_Latn': {'number_of_characters': 87.93, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'grn_Latn-grn_Latn': {'number_of_characters': 77.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'grn_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-grn_Latn': {'number_of_characters': 77.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'guj_Gujr-guj_Gujr': {'number_of_characters': 64.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'guj_Gujr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-guj_Gujr': {'number_of_characters': 64.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'hat_Latn-hat_Latn': {'number_of_characters': 72.65, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hat_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hat_Latn': {'number_of_characters': 72.65, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hau_Latn-hau_Latn': {'number_of_characters': 87.85, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'hau_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hau_Latn': {'number_of_characters': 87.85, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'heb_Hebr-heb_Hebr': {'number_of_characters': 57.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'heb_Hebr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-heb_Hebr': {'number_of_characters': 57.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-hin_Deva': {'number_of_characters': 74.62, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hin_Deva': {'number_of_characters': 74.62, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-hin_Latn': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hin_Latn': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hrv_Latn-hrv_Latn': {'number_of_characters': 70.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hrv_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hrv_Latn': {'number_of_characters': 70.84, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hun_Latn-hun_Latn': {'number_of_characters': 76.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hun_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hun_Latn': {'number_of_characters': 76.41, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hye_Armn-hye_Armn': {'number_of_characters': 77.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hye_Armn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hye_Armn': {'number_of_characters': 77.43, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ibo_Latn-ibo_Latn': {'number_of_characters': 74.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ibo_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ibo_Latn': {'number_of_characters': 74.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ilo_Latn-ilo_Latn': {'number_of_characters': 87.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ilo_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ilo_Latn': {'number_of_characters': 87.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ind_Latn-ind_Latn': {'number_of_characters': 84.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ind_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ind_Latn': {'number_of_characters': 84.11, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'isl_Latn-isl_Latn': {'number_of_characters': 79.27, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'isl_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-isl_Latn': {'number_of_characters': 79.27, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ita_Latn-ita_Latn': {'number_of_characters': 85.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ita_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ita_Latn': {'number_of_characters': 85.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'jav_Latn-jav_Latn': {'number_of_characters': 80.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'jav_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-jav_Latn': {'number_of_characters': 80.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'jpn_Jpan-jpn_Jpan': {'number_of_characters': 37.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'jpn_Jpan-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-jpn_Jpan': {'number_of_characters': 37.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'kac_Latn-kac_Latn': {'number_of_characters': 100.64, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'kac_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kac_Latn': {'number_of_characters': 100.64, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'kan_Knda-kan_Knda': {'number_of_characters': 74.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kan_Knda-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kan_Knda': {'number_of_characters': 74.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kat_Geor-kat_Geor': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kat_Geor-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kat_Geor': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kaz_Cyrl-kaz_Cyrl': {'number_of_characters': 72.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kaz_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kaz_Cyrl': {'number_of_characters': 72.76, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kea_Latn-kea_Latn': {'number_of_characters': 77.94, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kea_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kea_Latn': {'number_of_characters': 77.94, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khk_Cyrl-khk_Cyrl': {'number_of_characters': 75.33, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khk_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-khk_Cyrl': {'number_of_characters': 75.33, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khm_Khmr-khm_Khmr': {'number_of_characters': 77.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'khm_Khmr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-khm_Khmr': {'number_of_characters': 77.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kin_Latn-kin_Latn': {'number_of_characters': 81.9, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'kin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kin_Latn': {'number_of_characters': 81.9, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'kir_Cyrl-kir_Cyrl': {'number_of_characters': 76.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kir_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kir_Cyrl': {'number_of_characters': 76.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'kor_Hang-kor_Hang': {'number_of_characters': 37.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'kor_Hang-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kor_Hang': {'number_of_characters': 37.26, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'lao_Laoo-lao_Laoo': {'number_of_characters': 65.31, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'lao_Laoo-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lao_Laoo': {'number_of_characters': 65.31, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'lin_Latn-lin_Latn': {'number_of_characters': 83.57, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'lin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lin_Latn': {'number_of_characters': 83.57, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'lit_Latn-lit_Latn': {'number_of_characters': 70.7, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lit_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lit_Latn': {'number_of_characters': 70.7, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lug_Latn-lug_Latn': {'number_of_characters': 80.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'lug_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lug_Latn': {'number_of_characters': 80.52, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'luo_Latn-luo_Latn': {'number_of_characters': 75.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'luo_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-luo_Latn': {'number_of_characters': 75.14, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lvs_Latn-lvs_Latn': {'number_of_characters': 71.98, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'lvs_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lvs_Latn': {'number_of_characters': 71.98, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mal_Mlym-mal_Mlym': {'number_of_characters': 82.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mal_Mlym-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mal_Mlym': {'number_of_characters': 82.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mar_Deva-mar_Deva': {'number_of_characters': 70.63, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mar_Deva-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mar_Deva': {'number_of_characters': 70.63, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mkd_Cyrl-mkd_Cyrl': {'number_of_characters': 76.01, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mkd_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mkd_Cyrl': {'number_of_characters': 76.01, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mlt_Latn-mlt_Latn': {'number_of_characters': 77.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mlt_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mlt_Latn': {'number_of_characters': 77.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'mri_Latn-mri_Latn': {'number_of_characters': 83.71, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mri_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mri_Latn': {'number_of_characters': 83.71, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'mya_Mymr-mya_Mymr': {'number_of_characters': 91.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'mya_Mymr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mya_Mymr': {'number_of_characters': 91.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nld_Latn-nld_Latn': {'number_of_characters': 77.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nld_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nld_Latn': {'number_of_characters': 77.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nob_Latn-nob_Latn': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nob_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nob_Latn': {'number_of_characters': 73.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-npi_Deva': {'number_of_characters': 68.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-npi_Deva': {'number_of_characters': 68.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-npi_Latn': {'number_of_characters': 73.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-npi_Latn': {'number_of_characters': 73.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'nso_Latn-nso_Latn': {'number_of_characters': 88.77, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nso_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nso_Latn': {'number_of_characters': 88.77, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nya_Latn-nya_Latn': {'number_of_characters': 92.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'nya_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nya_Latn': {'number_of_characters': 92.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'ory_Orya-ory_Orya': {'number_of_characters': 74.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ory_Orya-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ory_Orya': {'number_of_characters': 74.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pan_Guru-pan_Guru': {'number_of_characters': 75.3, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pan_Guru-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pan_Guru': {'number_of_characters': 75.3, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pbt_Arab-pbt_Arab': {'number_of_characters': 69.67, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pbt_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pbt_Arab': {'number_of_characters': 69.67, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pes_Arab-pes_Arab': {'number_of_characters': 66.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'pes_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pes_Arab': {'number_of_characters': 66.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'plt_Latn-plt_Latn': {'number_of_characters': 97.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'plt_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-plt_Latn': {'number_of_characters': 97.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'pol_Latn-pol_Latn': {'number_of_characters': 76.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'pol_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pol_Latn': {'number_of_characters': 76.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'por_Latn-por_Latn': {'number_of_characters': 80.12, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'por_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-por_Latn': {'number_of_characters': 80.12, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ron_Latn-ron_Latn': {'number_of_characters': 80.74, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ron_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ron_Latn': {'number_of_characters': 80.74, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'rus_Cyrl-rus_Cyrl': {'number_of_characters': 85.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'rus_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-rus_Cyrl': {'number_of_characters': 85.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'shn_Mymr-shn_Mymr': {'number_of_characters': 77.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'shn_Mymr-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-shn_Mymr': {'number_of_characters': 77.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-sin_Latn': {'number_of_characters': 96.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sin_Latn': {'number_of_characters': 96.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-sin_Sinh': {'number_of_characters': 71.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sin_Sinh': {'number_of_characters': 71.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slk_Latn-slk_Latn': {'number_of_characters': 70.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slk_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-slk_Latn': {'number_of_characters': 70.54, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slv_Latn-slv_Latn': {'number_of_characters': 70.8, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'slv_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-slv_Latn': {'number_of_characters': 70.8, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'sna_Latn-sna_Latn': {'number_of_characters': 83.31, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sna_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sna_Latn': {'number_of_characters': 83.31, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'snd_Arab-snd_Arab': {'number_of_characters': 65.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'snd_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-snd_Arab': {'number_of_characters': 65.42, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'som_Latn-som_Latn': {'number_of_characters': 92.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'som_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-som_Latn': {'number_of_characters': 92.96, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sot_Latn-sot_Latn': {'number_of_characters': 85.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sot_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sot_Latn': {'number_of_characters': 85.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'spa_Latn-spa_Latn': {'number_of_characters': 84.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'spa_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-spa_Latn': {'number_of_characters': 84.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'srp_Cyrl-srp_Cyrl': {'number_of_characters': 69.5, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'srp_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-srp_Cyrl': {'number_of_characters': 69.5, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ssw_Latn-ssw_Latn': {'number_of_characters': 83.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'ssw_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ssw_Latn': {'number_of_characters': 83.1, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sun_Latn-sun_Latn': {'number_of_characters': 80.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'sun_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sun_Latn': {'number_of_characters': 80.16, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'swe_Latn-swe_Latn': {'number_of_characters': 70.68, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'swe_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-swe_Latn': {'number_of_characters': 70.68, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'swh_Latn-swh_Latn': {'number_of_characters': 82.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'swh_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-swh_Latn': {'number_of_characters': 82.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tam_Taml-tam_Taml': {'number_of_characters': 83.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tam_Taml-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tam_Taml': {'number_of_characters': 83.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tel_Telu-tel_Telu': {'number_of_characters': 74.19, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tel_Telu-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tel_Telu': {'number_of_characters': 74.19, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tgk_Cyrl-tgk_Cyrl': {'number_of_characters': 76.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tgk_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tgk_Cyrl': {'number_of_characters': 76.28, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tgl_Latn-tgl_Latn': {'number_of_characters': 84.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tgl_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tgl_Latn': {'number_of_characters': 84.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'tha_Thai-tha_Thai': {'number_of_characters': 61.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'tha_Thai-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tha_Thai': {'number_of_characters': 61.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'tir_Ethi-tir_Ethi': {'number_of_characters': 54.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'tir_Ethi-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tir_Ethi': {'number_of_characters': 54.0, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'tsn_Latn-tsn_Latn': {'number_of_characters': 89.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tsn_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tsn_Latn': {'number_of_characters': 89.13, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tso_Latn-tso_Latn': {'number_of_characters': 93.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tso_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tso_Latn': {'number_of_characters': 93.69, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'tur_Latn-tur_Latn': {'number_of_characters': 73.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'tur_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tur_Latn': {'number_of_characters': 73.56, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ukr_Cyrl-ukr_Cyrl': {'number_of_characters': 74.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ukr_Cyrl-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ukr_Cyrl': {'number_of_characters': 74.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-urd_Arab': {'number_of_characters': 72.53, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-urd_Arab': {'number_of_characters': 72.53, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-urd_Latn': {'number_of_characters': 92.07, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-urd_Latn': {'number_of_characters': 92.07, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'uzn_Latn-uzn_Latn': {'number_of_characters': 79.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'uzn_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-uzn_Latn': {'number_of_characters': 79.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'vie_Latn-vie_Latn': {'number_of_characters': 75.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'vie_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-vie_Latn': {'number_of_characters': 75.05, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'war_Latn-war_Latn': {'number_of_characters': 88.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'war_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-war_Latn': {'number_of_characters': 88.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'wol_Latn-wol_Latn': {'number_of_characters': 72.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'wol_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-wol_Latn': {'number_of_characters': 72.61, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'xho_Latn-xho_Latn': {'number_of_characters': 80.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'xho_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-xho_Latn': {'number_of_characters': 80.5, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'yor_Latn-yor_Latn': {'number_of_characters': 70.64, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'yor_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-yor_Latn': {'number_of_characters': 70.64, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'zho_Hans-zho_Hans': {'number_of_characters': 23.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zho_Hans-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zho_Hans': {'number_of_characters': 23.75, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zho_Hant-zho_Hant': {'number_of_characters': 23.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zho_Hant-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zho_Hant': {'number_of_characters': 23.08, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}, 'zsm_Latn-zsm_Latn': {'number_of_characters': 80.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'zsm_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zsm_Latn': {'number_of_characters': 80.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'zul_Latn-zul_Latn': {'number_of_characters': 78.04, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'zul_Latn-eng_Latn': {'number_of_characters': 79.35, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.09, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zul_Latn': {'number_of_characters': 78.04, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-arb_Latn': {'number_of_characters': 69.02, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-arb_Arab': {'number_of_characters': 60.55, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-ben_Latn': {'number_of_characters': 76.79, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-ben_Beng': {'number_of_characters': 71.48, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-hin_Latn': {'number_of_characters': 76.81, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-hin_Deva': {'number_of_characters': 74.62, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-npi_Latn': {'number_of_characters': 73.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-npi_Deva': {'number_of_characters': 68.9, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-sin_Latn': {'number_of_characters': 96.47, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-sin_Sinh': {'number_of_characters': 71.92, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-urd_Latn': {'number_of_characters': 92.07, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-urd_Arab': {'number_of_characters': 72.53, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'average_document_length': 0.0, 'average_query_length': 0.08, 'average_relevant_docs_per_query': 1.0}}}} |
BengaliDocumentClassification |
['ben'] |
Classification |
s2s |
[News, Written] |
None |
None |
BengaliHateSpeechClassification (Karim et al., 2020) |
['ben'] |
Classification |
s2s |
[News, Written] |
None |
None |
BengaliSentimentAnalysis (Sazzed et al., 2020) |
['ben'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
BibleNLPBitextMining (Akerman et al., 2023) |
['aai', 'aak', 'aau', 'aaz', 'abt', 'abx', 'aby', 'acf', 'acr', 'acu', 'adz', 'aer', 'aey', 'agd', 'agg', 'agm', 'agn', 'agr', 'agt', 'agu', 'aia', 'aii', 'aka', 'ake', 'alp', 'alq', 'als', 'aly', 'ame', 'amf', 'amk', 'amm', 'amn', 'amo', 'amp', 'amr', 'amu', 'amx', 'anh', 'anv', 'aoi', 'aoj', 'aom', 'aon', 'apb', 'ape', 'apn', 'apr', 'apu', 'apw', 'apz', 'arb', 'are', 'arl', 'arn', 'arp', 'asm', 'aso', 'ata', 'atb', 'atd', 'atg', 'att', 'auc', 'aui', 'auy', 'avt', 'awb', 'awk', 'awx', 'azb', 'azg', 'azz', 'bao', 'bba', 'bbb', 'bbr', 'bch', 'bco', 'bdd', 'bea', 'bef', 'bel', 'ben', 'beo', 'beu', 'bgs', 'bgt', 'bhg', 'bhl', 'big', 'bjk', 'bjp', 'bjr', 'bjv', 'bjz', 'bkd', 'bki', 'bkq', 'bkx', 'blw', 'blz', 'bmh', 'bmk', 'bmr', 'bmu', 'bnp', 'boa', 'boj', 'bon', 'box', 'bpr', 'bps', 'bqc', 'bqp', 'bre', 'bsj', 'bsn', 'bsp', 'bss', 'buk', 'bus', 'bvd', 'bvr', 'bxh', 'byr', 'byx', 'bzd', 'bzh', 'bzj', 'caa', 'cab', 'cac', 'caf', 'cak', 'cao', 'cap', 'car', 'cav', 'cax', 'cbc', 'cbi', 'cbk', 'cbr', 'cbs', 'cbt', 'cbu', 'cbv', 'cco', 'ceb', 'cek', 'ces', 'cgc', 'cha', 'chd', 'chf', 'chk', 'chq', 'chz', 'cjo', 'cjv', 'ckb', 'cle', 'clu', 'cme', 'cmn', 'cni', 'cnl', 'cnt', 'cof', 'con', 'cop', 'cot', 'cpa', 'cpb', 'cpc', 'cpu', 'cpy', 'crn', 'crx', 'cso', 'csy', 'cta', 'cth', 'ctp', 'ctu', 'cub', 'cuc', 'cui', 'cuk', 'cut', 'cux', 'cwe', 'cya', 'daa', 'dad', 'dah', 'dan', 'ded', 'deu', 'dgc', 'dgr', 'dgz', 'dhg', 'dif', 'dik', 'dji', 'djk', 'djr', 'dob', 'dop', 'dov', 'dwr', 'dww', 'dwy', 'ebk', 'eko', 'emi', 'emp', 'eng', 'enq', 'epo', 'eri', 'ese', 'esk', 'etr', 'ewe', 'faa', 'fai', 'far', 'ffm', 'for', 'fra', 'fue', 'fuf', 'fuh', 'gah', 'gai', 'gam', 'gaw', 'gdn', 'gdr', 'geb', 'gfk', 'ghs', 'glk', 'gmv', 'gng', 'gnn', 'gnw', 'gof', 'grc', 'gub', 'guh', 'gui', 'guj', 'gul', 'gum', 'gun', 'guo', 'gup', 'gux', 'gvc', 'gvf', 'gvn', 'gvs', 'gwi', 'gym', 'gyr', 'hat', 'hau', 'haw', 'hbo', 'hch', 'heb', 'heg', 'hin', 'hix', 'hla', 'hlt', 'hmo', 'hns', 'hop', 'hot', 'hrv', 'hto', 'hub', 'hui', 'hun', 'hus', 'huu', 'huv', 'hvn', 'ian', 'ign', 'ikk', 'ikw', 'ilo', 'imo', 'inb', 'ind', 'ino', 'iou', 'ipi', 'isn', 'ita', 'iws', 'ixl', 'jac', 'jae', 'jao', 'jic', 'jid', 'jiv', 'jni', 'jpn', 'jvn', 'kan', 'kaq', 'kbc', 'kbh', 'kbm', 'kbq', 'kdc', 'kde', 'kdl', 'kek', 'ken', 'kew', 'kgf', 'kgk', 'kgp', 'khs', 'khz', 'kik', 'kiw', 'kiz', 'kje', 'kjs', 'kkc', 'kkl', 'klt', 'klv', 'kmg', 'kmh', 'kmk', 'kmo', 'kms', 'kmu', 'kne', 'knf', 'knj', 'knv', 'kos', 'kpf', 'kpg', 'kpj', 'kpr', 'kpw', 'kpx', 'kqa', 'kqc', 'kqf', 'kql', 'kqw', 'ksd', 'ksj', 'ksr', 'ktm', 'kto', 'kud', 'kue', 'kup', 'kvg', 'kvn', 'kwd', 'kwf', 'kwi', 'kwj', 'kyc', 'kyf', 'kyg', 'kyq', 'kyz', 'kze', 'lac', 'lat', 'lbb', 'lbk', 'lcm', 'leu', 'lex', 'lgl', 'lid', 'lif', 'lin', 'lit', 'llg', 'lug', 'luo', 'lww', 'maa', 'maj', 'mal', 'mam', 'maq', 'mar', 'mau', 'mav', 'maz', 'mbb', 'mbc', 'mbh', 'mbj', 'mbl', 'mbs', 'mbt', 'mca', 'mcb', 'mcd', 'mcf', 'mco', 'mcp', 'mcq', 'mcr', 'mdy', 'med', 'mee', 'mek', 'meq', 'met', 'meu', 'mgc', 'mgh', 'mgw', 'mhl', 'mib', 'mic', 'mie', 'mig', 'mih', 'mil', 'mio', 'mir', 'mit', 'miz', 'mjc', 'mkj', 'mkl', 'mkn', 'mks', 'mle', 'mlh', 'mlp', 'mmo', 'mmx', 'mna', 'mop', 'mox', 'mph', 'mpj', 'mpm', 'mpp', 'mps', 'mpt', 'mpx', 'mqb', 'mqj', 'msb', 'msc', 'msk', 'msm', 'msy', 'mti', 'mto', 'mux', 'muy', 'mva', 'mvn', 'mwc', 'mwe', 'mwf', 'mwp', 'mxb', 'mxp', 'mxq', 'mxt', 'mya', 'myk', 'myu', 'myw', 'myy', 'mzz', 'nab', 'naf', 'nak', 'nas', 'nbq', 'nca', 'nch', 'ncj', 'ncl', 'ncu', 'ndg', 'ndj', 'nfa', 'ngp', 'ngu', 'nhe', 'nhg', 'nhi', 'nho', 'nhr', 'nhu', 'nhw', 'nhy', 'nif', 'nii', 'nin', 'nko', 'nld', 'nlg', 'nna', 'nnq', 'noa', 'nop', 'not', 'nou', 'npi', 'npl', 'nsn', 'nss', 'ntj', 'ntp', 'ntu', 'nuy', 'nvm', 'nwi', 'nya', 'nys', 'nyu', 'obo', 'okv', 'omw', 'ong', 'ons', 'ood', 'opm', 'ory', 'ote', 'otm', 'otn', 'otq', 'ots', 'pab', 'pad', 'pah', 'pan', 'pao', 'pes', 'pib', 'pio', 'pir', 'piu', 'pjt', 'pls', 'plu', 'pma', 'poe', 'poh', 'poi', 'pol', 'pon', 'por', 'poy', 'ppo', 'prf', 'pri', 'ptp', 'ptu', 'pwg', 'qub', 'quc', 'quf', 'quh', 'qul', 'qup', 'qvc', 'qve', 'qvh', 'qvm', 'qvn', 'qvs', 'qvw', 'qvz', 'qwh', 'qxh', 'qxn', 'qxo', 'rai', 'reg', 'rgu', 'rkb', 'rmc', 'rmy', 'ron', 'roo', 'rop', 'row', 'rro', 'ruf', 'rug', 'rus', 'rwo', 'sab', 'san', 'sbe', 'sbk', 'sbs', 'seh', 'sey', 'sgb', 'sgz', 'shj', 'shp', 'sim', 'sja', 'sll', 'smk', 'snc', 'snn', 'snp', 'snx', 'sny', 'som', 'soq', 'soy', 'spa', 'spl', 'spm', 'spp', 'sps', 'spy', 'sri', 'srm', 'srn', 'srp', 'srq', 'ssd', 'ssg', 'ssx', 'stp', 'sua', 'sue', 'sus', 'suz', 'swe', 'swh', 'swp', 'sxb', 'tac', 'taj', 'tam', 'tav', 'taw', 'tbc', 'tbf', 'tbg', 'tbo', 'tbz', 'tca', 'tcs', 'tcz', 'tdt', 'tee', 'tel', 'ter', 'tet', 'tew', 'tfr', 'tgk', 'tgl', 'tgo', 'tgp', 'tha', 'tif', 'tim', 'tiw', 'tiy', 'tke', 'tku', 'tlf', 'tmd', 'tna', 'tnc', 'tnk', 'tnn', 'tnp', 'toc', 'tod', 'tof', 'toj', 'ton', 'too', 'top', 'tos', 'tpa', 'tpi', 'tpt', 'tpz', 'trc', 'tsw', 'ttc', 'tte', 'tuc', 'tue', 'tuf', 'tuo', 'tur', 'tvk', 'twi', 'txq', 'txu', 'tzj', 'tzo', 'ubr', 'ubu', 'udu', 'uig', 'ukr', 'uli', 'ulk', 'upv', 'ura', 'urb', 'urd', 'uri', 'urt', 'urw', 'usa', 'usp', 'uvh', 'uvl', 'vid', 'vie', 'viv', 'vmy', 'waj', 'wal', 'wap', 'wat', 'wbi', 'wbp', 'wed', 'wer', 'wim', 'wiu', 'wiv', 'wmt', 'wmw', 'wnc', 'wnu', 'wol', 'wos', 'wrk', 'wro', 'wrs', 'wsk', 'wuv', 'xav', 'xbi', 'xed', 'xla', 'xnn', 'xon', 'xsi', 'xtd', 'xtm', 'yaa', 'yad', 'yal', 'yap', 'yaq', 'yby', 'ycn', 'yka', 'yle', 'yml', 'yon', 'yor', 'yrb', 'yre', 'yss', 'yuj', 'yut', 'yuw', 'yva', 'zaa', 'zab', 'zac', 'zad', 'zai', 'zaj', 'zam', 'zao', 'zap', 'zar', 'zas', 'zat', 'zav', 'zaw', 'zca', 'zga', 'zia', 'ziw', 'zlm', 'zos', 'zpc', 'zpl', 'zpm', 'zpo', 'zpq', 'zpu', 'zpv', 'zpz', 'zsr', 'ztq', 'zty', 'zyp'] |
BitextMining |
s2s |
[Religious, Written] |
None |
None |
BigPatentClustering.v2 (Eva Sharma and Chen Li and Lu Wang, 2019) |
['eng'] |
Clustering |
p2p |
[Legal, Written] |
None |
None |
BiorxivClusteringP2P.v2 |
['eng'] |
Clustering |
p2p |
[Academic, Written] |
None |
None |
BiorxivClusteringS2S.v2 |
['eng'] |
Clustering |
s2s |
[Academic, Written] |
None |
None |
BlurbsClusteringP2P.v2 (Steffen Remus, 2019) |
['deu'] |
Clustering |
p2p |
[Fiction, Written] |
None |
None |
BlurbsClusteringS2S.v2 (Steffen Remus, 2019) |
['deu'] |
Clustering |
s2s |
[Fiction, Written] |
None |
None |
BornholmBitextMining |
['dan'] |
BitextMining |
s2s |
[Web, Social, Fiction, Written] |
{'test': 500} |
{'test': {'average_sentence1_length': 49.83, 'average_sentence2_length': 38.89, 'num_samples': 500, 'number_of_characters': 44361}} |
BrazilianToxicTweetsClassification (Joao Augusto Leite and Diego F. Silva and Kalina Bontcheva and Carolina Scarton, 2020) |
['por'] |
MultilabelClassification |
s2s |
[Constructed, Written] |
None |
None |
BrightRetrieval (Hongjin Su, 2024) |
['eng'] |
Retrieval |
s2p |
[Non-fiction] |
None |
None |
BulgarianStoreReviewSentimentClassfication (Georgieva-Trifonova et al., 2018) |
['bul'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
CBD |
['pol'] |
Classification |
s2s |
[Written, Social] |
None |
None |
CDSC-E |
['pol'] |
PairClassification |
s2s |
[Written] |
None |
None |
CDSC-R |
['pol'] |
STS |
s2s |
[Web, Written] |
None |
None |
CEDRClassification (Sboev et al., 2021) |
['rus'] |
MultilabelClassification |
s2s |
[Web, Social, Blog, Written] |
{'test': 1882} |
{'test': {'average_text_length': 91.21, 'number_of_characters': 171649, 'average_label_per_text': 0.62, 'num_samples': 1882, 'unique_labels': 6, 'labels': {'None': {'count': 734}, '3': {'count': 141}, '2': {'count': 170}, '1': {'count': 379}, '0': {'count': 353}, '4': {'count': 125}}}} |
CLSClusteringP2P.v2 (Yudong Li, 2022) |
['cmn'] |
Clustering |
p2p |
[Academic, Written] |
None |
None |
CLSClusteringS2S.v2 (Yudong Li, 2022) |
['cmn'] |
Clustering |
s2s |
[Academic, Written] |
None |
None |
CMedQAv1-reranking (Zhang et al., 2017) |
['cmn'] |
Reranking |
s2s |
[Medical, Written] |
None |
None |
CMedQAv2-reranking (S. Zhang, 2018) |
['cmn'] |
Reranking |
s2s |
|
None |
None |
COIRCodeSearchNetRetrieval (Husain et al., 2019) |
['go', 'java', 'javascript', 'php', 'python', 'ruby'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1056326} |
{'test': {'number_of_characters': 664.77, 'num_samples': 1056326, 'num_queries': 52561, 'num_documents': 1003765, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 941.4, 'num_samples': 295228, 'num_queries': 14918, 'num_documents': 280310, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 748.83, 'num_samples': 68145, 'num_queries': 3291, 'num_documents': 64854, 'average_document_length': 0.0, 'average_query_length': 0.23, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 405.38, 'num_samples': 190562, 'num_queries': 8122, 'num_documents': 182440, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 457.44, 'num_samples': 28831, 'num_queries': 1261, 'num_documents': 27570, 'average_document_length': 0.0, 'average_query_length': 0.36, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 588.89, 'num_samples': 191821, 'num_queries': 10955, 'num_documents': 180866, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 578.85, 'num_samples': 281739, 'num_queries': 14014, 'num_documents': 267725, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}}}} |
CPUSpeedTask |
['eng'] |
Speed |
s2s |
[Fiction, Written] |
None |
None |
CQADupstackAndroidRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CQADupstackEnglishRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CQADupstackGamingRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CQADupstackGisRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CQADupstackMathematicaRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CQADupstackPhysicsRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CQADupstackProgrammersRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
[Programming, Written, Non-fiction] |
None |
None |
CQADupstackStatsRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CQADupstackTexRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CQADupstackUnixRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CQADupstackWebmastersRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CQADupstackWordpressRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CSFDCZMovieReviewSentimentClassification (Michal Štefánik, 2023) |
['ces'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
CSFDSKMovieReviewSentimentClassification (Michal Štefánik, 2023) |
['slk'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
CTKFactsNLI (Ullrich et al., 2023) |
['ces'] |
PairClassification |
s2s |
[News, Written] |
None |
None |
CUADAffiliateLicenseLicenseeLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADAffiliateLicenseLicensorLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADAntiAssignmentLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADAuditRightsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADCapOnLiabilityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADChangeOfControlLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADCompetitiveRestrictionExceptionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADCovenantNotToSueLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADEffectiveDateLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADExclusivityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADExpirationDateLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADGoverningLawLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADIPOwnershipAssignmentLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADInsuranceLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADIrrevocableOrPerpetualLicenseLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADJointIPOwnershipLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADLicenseGrantLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADLiquidatedDamagesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADMinimumCommitmentLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADMostFavoredNationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADNoSolicitOfCustomersLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADNoSolicitOfEmployeesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADNonCompeteLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADNonDisparagementLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADNonTransferableLicenseLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADNoticePeriodToTerminateRenewalLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADPostTerminationServicesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADPriceRestrictionsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADRenewalTermLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADRevenueProfitSharingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADRofrRofoRofnLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADSourceCodeEscrowLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADTerminationForConvenienceLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADThirdPartyBeneficiaryLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADUncappedLiabilityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADVolumeRestrictionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CUADWarrantyDurationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CanadaTaxCourtOutcomesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CataloniaTweetClassification |
['cat', 'spa'] |
Classification |
s2s |
[Social, Government, Written] |
None |
None |
ClimateFEVER (Thomas Diggelmann, 2021) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
ClimateFEVERHardNegatives (Thomas Diggelmann, 2021) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
CmedqaRetrieval |
['cmn'] |
Retrieval |
s2p |
|
None |
None |
Cmnli |
['cmn'] |
PairClassification |
s2s |
|
None |
None |
CodeEditSearchRetrieval (Niklas Muennighoff, 2023) |
['c', 'c++', 'go', 'java', 'javascript', 'php', 'python', 'ruby', 'rust', 'scala', 'shell', 'swift', 'typescript'] |
Retrieval |
p2p |
[Programming, Written] |
{'train': 26000} |
{'train': {'number_of_characters': 71.99, 'num_samples': 26000, 'num_queries': 13000, 'num_documents': 13000, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 70.52, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 57.88, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'typescript': {'number_of_characters': 61.09, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 71.8, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 67.9, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 63.98, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 62.93, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'c': {'number_of_characters': 98.59, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.1, 'average_relevant_docs_per_query': 1.0}, 'c++': {'number_of_characters': 115.48, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.11, 'average_relevant_docs_per_query': 1.0}, 'rust': {'number_of_characters': 68.5, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}, 'swift': {'number_of_characters': 58.28, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'scala': {'number_of_characters': 65.83, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.06, 'average_relevant_docs_per_query': 1.0}, 'shell': {'number_of_characters': 73.06, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}}}} |
CodeFeedbackMT (Tianyu Zheng, 2024) |
['eng'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 79660} |
{'test': {'number_of_characters': 5894.4, 'num_samples': 79660, 'num_queries': 13277, 'num_documents': 66383, 'average_document_length': 0.02, 'average_query_length': 0.33, 'average_relevant_docs_per_query': 1.0}} |
CodeFeedbackST (Xiangyang Li, 2024) |
['eng'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 187832} |
{'test': {'number_of_characters': 2246.58, 'num_samples': 187832, 'num_queries': 31306, 'num_documents': 156526, 'average_document_length': 0.01, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}} |
CodeSearchNetCCRetrieval (Xiangyang Li, 2024) |
['go', 'java', 'javascript', 'php', 'python', 'ruby'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1058035} |
{'test': {'number_of_characters': 390.06, 'num_samples': 1058035, 'num_queries': 52561, 'num_documents': 1005474, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 553.79, 'num_samples': 295570, 'num_queries': 14918, 'num_documents': 280652, 'average_document_length': 0.0, 'average_query_length': 0.04, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 445.71, 'num_samples': 68492, 'num_queries': 3291, 'num_documents': 65201, 'average_document_length': 0.0, 'average_query_length': 0.13, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 235.77, 'num_samples': 190857, 'num_queries': 8122, 'num_documents': 182735, 'average_document_length': 0.0, 'average_query_length': 0.03, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 268.87, 'num_samples': 28849, 'num_queries': 1261, 'num_documents': 27588, 'average_document_length': 0.0, 'average_query_length': 0.21, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 344.53, 'num_samples': 192016, 'num_queries': 10955, 'num_documents': 181061, 'average_document_length': 0.0, 'average_query_length': 0.03, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 338.62, 'num_samples': 282251, 'num_queries': 14014, 'num_documents': 268237, 'average_document_length': 0.0, 'average_query_length': 0.02, 'average_relevant_docs_per_query': 1.0}}}} |
CodeSearchNetRetrieval (Husain et al., 2019) |
['go', 'java', 'javascript', 'php', 'python', 'ruby'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 12000} |
{'test': {'number_of_characters': 325.01, 'num_samples': 12000, 'num_queries': 6000, 'num_documents': 6000, 'average_document_length': 0.0, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 467.55, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.47, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'number_of_characters': 187.02, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.19, 'average_relevant_docs_per_query': 1.0}, 'go': {'number_of_characters': 126.21, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.13, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'number_of_characters': 314.82, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.31, 'average_relevant_docs_per_query': 1.0}, 'java': {'number_of_characters': 691.36, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.69, 'average_relevant_docs_per_query': 1.0}, 'php': {'number_of_characters': 163.12, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'average_document_length': 0.0, 'average_query_length': 0.16, 'average_relevant_docs_per_query': 1.0}}}} |
CodeTransOceanContest (Weixiang Yan, 2023) |
['c++', 'python'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1229} |
{'test': {'number_of_characters': 2520.65, 'num_samples': 1229, 'num_queries': 221, 'num_documents': 1008, 'average_document_length': 1.5, 'average_query_length': 4.58, 'average_relevant_docs_per_query': 1.0}} |
CodeTransOceanDL (Weixiang Yan, 2023) |
['python'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 996} |
{'test': {'number_of_characters': 3347.7, 'num_samples': 996, 'num_queries': 180, 'num_documents': 816, 'average_document_length': 1.81, 'average_query_length': 10.38, 'average_relevant_docs_per_query': 1.0}} |
ContractNLIConfidentialityOfAgreementLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLIExplicitIdentificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLILimitedUseLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLINoLicensingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLINoticeOnCompelledDisclosureLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLIPermissibleCopyLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLIReturnOfConfidentialInformationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLISharingWithEmployeesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLISharingWithThirdPartiesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ContractNLISurvivalOfObligationsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
Core17InstructionRetrieval (Orion Weller, 2024) |
['eng'] |
InstructionRetrieval |
s2p |
[News, Written] |
{'test': 19919} |
{'test': {'num_samples': 19919, 'num_docs': 19899, 'num_queries': 20, 'number_of_characters': 44450333, 'average_document_length': 2233.03, 'average_query_length': 109.75, 'average_instruction_length': 295.55, 'average_changed_instruction_length': 355.2, 'average_relevant_docs_per_query': 32.7, 'average_top_ranked_per_query': 1000.0}} |
CorporateLobbyingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
CosQA (Junjie Huang, 2021) |
['eng', 'python'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 21104} |
{'test': {'number_of_characters': 313.95, 'num_samples': 21104, 'num_queries': 500, 'num_documents': 20604, 'average_document_length': 0.01, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 1.0}} |
CovidRetrieval |
['cmn'] |
Retrieval |
s2p |
|
None |
None |
CrossLingualSemanticDiscriminationWMT19 |
['deu', 'fra'] |
Retrieval |
s2s |
[News, Written] |
None |
None |
CrossLingualSemanticDiscriminationWMT21 |
['deu', 'fra'] |
Retrieval |
s2s |
[News, Written] |
None |
None |
CyrillicTurkicLangClassification (Goldhahn et al., 2012) |
['bak', 'chv', 'kaz', 'kir', 'krc', 'rus', 'sah', 'tat', 'tyv'] |
Classification |
s2s |
[Web, Written] |
None |
None |
CzechProductReviewSentimentClassification |
['ces'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
CzechSoMeSentimentClassification |
['ces'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
CzechSubjectivityClassification |
['ces'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
DBPedia (Hasibi et al., 2017) |
['eng'] |
Retrieval |
s2p |
[Written, Encyclopaedic] |
None |
None |
DBPedia-PL (Hasibi et al., 2017) |
['pol'] |
Retrieval |
s2p |
[Written, Encyclopaedic] |
None |
None |
DBPedia-PLHardNegatives (Hasibi et al., 2017) |
['pol'] |
Retrieval |
s2p |
[Written, Encyclopaedic] |
None |
None |
DBPediaHardNegatives (Hasibi et al., 2017) |
['eng'] |
Retrieval |
s2p |
[Written, Encyclopaedic] |
None |
None |
DBpediaClassification (Zhang et al., 2015) |
['eng'] |
Classification |
s2s |
[Encyclopaedic, Written] |
None |
None |
DKHateClassification |
['dan'] |
Classification |
s2s |
[Social, Written] |
None |
None |
DalajClassification |
['swe'] |
Classification |
s2s |
[Non-fiction, Written] |
None |
None |
DanFeverRetrieval |
['dan'] |
Retrieval |
p2p |
[Encyclopaedic, Non-fiction, Spoken] |
None |
None |
DanishPoliticalCommentsClassification (Mads Guldborg Kjeldgaard Kongsbak, 2019) |
['dan'] |
Classification |
s2s |
[Social, Written] |
None |
None |
DefinitionClassificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
DiaBlaBitextMining (González et al., 2019) |
['eng', 'fra'] |
BitextMining |
s2s |
[Social, Written] |
None |
None |
Diversity1LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
Diversity2LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
Diversity3LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
Diversity4LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
Diversity5LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
Diversity6LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
DuRetrieval (Yifu Qiu, 2022) |
['cmn'] |
Retrieval |
s2p |
|
None |
None |
DutchBookReviewSentimentClassification (Benjamin et al., 2019) |
['nld'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
ESCIReranking (Chandan K. Reddy, 2022) |
['eng', 'jpn', 'spa'] |
Reranking |
s2p |
[Written] |
{'test': 29285} |
{'test': {'num_samples': 29285, 'number_of_characters': 254538331, 'num_positive': 271416, 'num_negative': 44235, 'avg_query_len': 19.69, 'avg_positive_len': 803.92, 'avg_negative_len': 808.5, 'hf_subset_descriptive_stats': {'us': {'num_samples': 21296, 'number_of_characters': 186915609, 'num_positive': 189375, 'num_negative': 25463, 'avg_query_len': 21.44, 'avg_positive_len': 868.37, 'avg_negative_len': 864.45}, 'es': {'num_samples': 3703, 'number_of_characters': 48861389, 'num_positive': 39110, 'num_negative': 10183, 'avg_query_len': 20.68, 'avg_positive_len': 980.96, 'avg_negative_len': 1023.22}, 'jp': {'num_samples': 4286, 'number_of_characters': 18761333, 'num_positive': 42931, 'num_negative': 8589, 'avg_query_len': 10.15, 'avg_positive_len': 358.36, 'avg_negative_len': 388.08}}}} |
EcomRetrieval |
['cmn'] |
Retrieval |
s2p |
|
None |
None |
EightTagsClustering.v2 |
['pol'] |
Clustering |
s2s |
[Social, Written] |
None |
None |
EmotionClassification |
['eng'] |
Classification |
s2s |
[Social, Written] |
None |
None |
EstQA |
['est'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
EstonianValenceClassification |
['est'] |
Classification |
s2s |
[News, Written] |
None |
None |
FEVER |
['eng'] |
Retrieval |
s2p |
|
None |
None |
FEVERHardNegatives |
['eng'] |
Retrieval |
s2p |
|
None |
None |
FQuADRetrieval |
['fra'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
FaithDial (Dziri et al., 2022) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
FalseFriendsGermanEnglish |
['deu'] |
PairClassification |
s2s |
[Written] |
None |
None |
FaroeseSTS |
['fao'] |
STS |
s2s |
[News, Web, Written] |
None |
None |
FarsTail (Amirkhani et al., 2023) |
['fas'] |
PairClassification |
s2s |
[Academic, Written] |
None |
None |
FeedbackQARetrieval |
['eng'] |
Retrieval |
s2p |
[Web, Government, Medical, Written] |
None |
None |
FiQA-PL (Nandan Thakur, 2021) |
['pol'] |
Retrieval |
s2p |
|
None |
None |
FiQA2018 (Nandan Thakur, 2021) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
FilipinoHateSpeechClassification (Neil Vicente Cabasag et al., 2019) |
['fil'] |
Classification |
s2s |
[Social, Written] |
None |
None |
FilipinoShopeeReviewsClassification |
['fil'] |
Classification |
s2s |
[Social, Written] |
None |
None |
FinParaSTS |
['fin'] |
STS |
s2s |
[News, Subtitles, Written] |
None |
None |
FinToxicityClassification |
['fin'] |
Classification |
s2s |
[News, Written] |
None |
None |
FinancialPhrasebankClassification (P. Malo, 2014) |
['eng'] |
Classification |
s2s |
[News, Written] |
None |
None |
FloresBitextMining (Goyal et al., 2022) |
['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] |
BitextMining |
s2s |
[Non-fiction, Encyclopaedic, Written] |
None |
None |
FrenchBookReviews |
['fra'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
FrenkEnClassification (Nikola Ljubešić, 2019) |
['eng'] |
Classification |
s2s |
[Social, Written] |
None |
None |
FrenkHrClassification (Nikola Ljubešić, 2019) |
['hrv'] |
Classification |
s2s |
[Social, Written] |
None |
None |
FrenkSlClassification (Nikola Ljubešić, 2019) |
['slv'] |
Classification |
s2s |
[Social, Written] |
None |
None |
FunctionOfDecisionSectionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
GPUSpeedTask |
['eng'] |
Speed |
s2s |
[Fiction, Written] |
None |
None |
GeoreviewClassification |
['rus'] |
Classification |
p2p |
[Reviews, Written] |
None |
None |
GeoreviewClusteringP2P |
['rus'] |
Clustering |
p2p |
[Reviews, Written] |
None |
None |
GeorgianFAQRetrieval |
['kat'] |
Retrieval |
s2p |
[Web, Written] |
None |
None |
GerDaLIR |
['deu'] |
Retrieval |
s2p |
|
None |
None |
GerDaLIRSmall |
['deu'] |
Retrieval |
p2p |
[Legal, Written] |
None |
None |
GermanDPR (Timo Möller, 2021) |
['deu'] |
Retrieval |
s2p |
|
None |
None |
GermanGovServiceRetrieval |
['deu'] |
Retrieval |
s2p |
[Government, Written] |
None |
None |
GermanPoliticiansTwitterSentimentClassification |
['deu'] |
Classification |
s2s |
[Social, Government, Written] |
None |
None |
GermanQuAD-Retrieval (Timo Möller, 2021) |
['deu'] |
Retrieval |
s2p |
|
None |
None |
GermanSTSBenchmark (Philip May, 2021) |
['deu'] |
STS |
s2s |
|
None |
None |
GreekCivicsQA |
['ell'] |
Retrieval |
s2p |
[Academic, Written] |
None |
None |
GreekLegalCodeClassification |
['ell'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
GujaratiNewsClassification |
['guj'] |
Classification |
s2s |
[News, Written] |
None |
None |
HALClusteringS2S.v2 (Mathieu Ciancone, 2024) |
['fra'] |
Clustering |
s2s |
[Academic, Written] |
None |
None |
HagridRetrieval (Ehsan Kamalloo, 2023) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
HateSpeechPortugueseClassification |
['por'] |
Classification |
s2s |
[Social, Written] |
None |
None |
HeadlineClassification |
['rus'] |
Classification |
s2s |
[News, Written] |
None |
None |
HebrewSentimentAnalysis |
['heb'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
HellaSwag (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
HinDialectClassification (Bafna et al., 2022) |
['anp', 'awa', 'ben', 'bgc', 'bhb', 'bhd', 'bho', 'bjj', 'bns', 'bra', 'gbm', 'guj', 'hne', 'kfg', 'kfy', 'mag', 'mar', 'mup', 'noe', 'pan', 'raj'] |
Classification |
s2s |
[Social, Spoken, Written] |
None |
None |
HindiDiscourseClassification |
['hin'] |
Classification |
s2s |
[Fiction, Social, Written] |
None |
None |
HotelReviewSentimentClassification (Elnagar et al., 2018) |
['ara'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
HotpotQA |
['eng'] |
Retrieval |
s2p |
[Web, Written] |
None |
None |
HotpotQA-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
[Web, Written] |
None |
None |
HotpotQA-PLHardNegatives (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
[Web, Written] |
None |
None |
HotpotQAHardNegatives |
['eng'] |
Retrieval |
s2p |
[Web, Written] |
None |
None |
HunSum2AbstractiveRetrieval (Botond Barta, 2024) |
['hun'] |
Retrieval |
s2p |
[News, Written] |
None |
None |
IFlyTek |
['cmn'] |
Classification |
s2s |
|
None |
None |
IN22ConvBitextMining (Jay Gala, 2023) |
['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] |
BitextMining |
s2s |
[Social, Spoken, Fiction, Spoken] |
{'test': 760518} |
{'test': {'average_sentence1_length': 54.33, 'average_sentence2_length': 54.33, 'num_samples': 760518, 'number_of_characters': 82637104, 'hf_subset_descriptive_stats': {'asm_Beng-ben_Beng': {'average_sentence1_length': 53.75, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155988}, 'asm_Beng-brx_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 162044}, 'asm_Beng-doi_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 167032}, 'asm_Beng-eng_Latn': {'average_sentence1_length': 53.75, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 160716}, 'asm_Beng-gom_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 156282}, 'asm_Beng-guj_Gujr': {'average_sentence1_length': 53.75, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 158269}, 'asm_Beng-hin_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159964}, 'asm_Beng-kan_Knda': {'average_sentence1_length': 53.75, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 165177}, 'asm_Beng-kas_Arab': {'average_sentence1_length': 53.75, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 164681}, 'asm_Beng-mai_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 162408}, 'asm_Beng-mal_Mlym': {'average_sentence1_length': 53.75, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 172838}, 'asm_Beng-mar_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 162747}, 'asm_Beng-mni_Mtei': {'average_sentence1_length': 53.75, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 157316}, 'asm_Beng-npi_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 160906}, 'asm_Beng-ory_Orya': {'average_sentence1_length': 53.75, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 164223}, 'asm_Beng-pan_Guru': {'average_sentence1_length': 53.75, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 160201}, 'asm_Beng-san_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 158093}, 'asm_Beng-sat_Olck': {'average_sentence1_length': 53.75, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 169379}, 'asm_Beng-snd_Deva': {'average_sentence1_length': 53.75, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 162623}, 'asm_Beng-tam_Taml': {'average_sentence1_length': 53.75, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 174866}, 'asm_Beng-tel_Telu': {'average_sentence1_length': 53.75, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 157690}, 'asm_Beng-urd_Arab': {'average_sentence1_length': 53.75, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 161305}, 'ben_Beng-asm_Beng': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 155988}, 'ben_Beng-brx_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 156448}, 'ben_Beng-doi_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 161436}, 'ben_Beng-eng_Latn': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 155120}, 'ben_Beng-gom_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 150686}, 'ben_Beng-guj_Gujr': {'average_sentence1_length': 50.03, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 152673}, 'ben_Beng-hin_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 154368}, 'ben_Beng-kan_Knda': {'average_sentence1_length': 50.03, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 159581}, 'ben_Beng-kas_Arab': {'average_sentence1_length': 50.03, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 159085}, 'ben_Beng-mai_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 156812}, 'ben_Beng-mal_Mlym': {'average_sentence1_length': 50.03, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 167242}, 'ben_Beng-mar_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 157151}, 'ben_Beng-mni_Mtei': {'average_sentence1_length': 50.03, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 151720}, 'ben_Beng-npi_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 155310}, 'ben_Beng-ory_Orya': {'average_sentence1_length': 50.03, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 158627}, 'ben_Beng-pan_Guru': {'average_sentence1_length': 50.03, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 154605}, 'ben_Beng-san_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 152497}, 'ben_Beng-sat_Olck': {'average_sentence1_length': 50.03, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 163783}, 'ben_Beng-snd_Deva': {'average_sentence1_length': 50.03, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 157027}, 'ben_Beng-tam_Taml': {'average_sentence1_length': 50.03, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 169270}, 'ben_Beng-tel_Telu': {'average_sentence1_length': 50.03, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 152094}, 'ben_Beng-urd_Arab': {'average_sentence1_length': 50.03, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 155709}, 'brx_Deva-asm_Beng': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162044}, 'brx_Deva-ben_Beng': {'average_sentence1_length': 54.06, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 156448}, 'brx_Deva-doi_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 167492}, 'brx_Deva-eng_Latn': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161176}, 'brx_Deva-gom_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 156742}, 'brx_Deva-guj_Gujr': {'average_sentence1_length': 54.06, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 158729}, 'brx_Deva-hin_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 160424}, 'brx_Deva-kan_Knda': {'average_sentence1_length': 54.06, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 165637}, 'brx_Deva-kas_Arab': {'average_sentence1_length': 54.06, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165141}, 'brx_Deva-mai_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 162868}, 'brx_Deva-mal_Mlym': {'average_sentence1_length': 54.06, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 173298}, 'brx_Deva-mar_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 163207}, 'brx_Deva-mni_Mtei': {'average_sentence1_length': 54.06, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 157776}, 'brx_Deva-npi_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 161366}, 'brx_Deva-ory_Orya': {'average_sentence1_length': 54.06, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 164683}, 'brx_Deva-pan_Guru': {'average_sentence1_length': 54.06, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 160661}, 'brx_Deva-san_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 158553}, 'brx_Deva-sat_Olck': {'average_sentence1_length': 54.06, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 169839}, 'brx_Deva-snd_Deva': {'average_sentence1_length': 54.06, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 163083}, 'brx_Deva-tam_Taml': {'average_sentence1_length': 54.06, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 175326}, 'brx_Deva-tel_Telu': {'average_sentence1_length': 54.06, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158150}, 'brx_Deva-urd_Arab': {'average_sentence1_length': 54.06, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 161765}, 'doi_Deva-asm_Beng': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 167032}, 'doi_Deva-ben_Beng': {'average_sentence1_length': 57.38, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 161436}, 'doi_Deva-brx_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 167492}, 'doi_Deva-eng_Latn': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 166164}, 'doi_Deva-gom_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 161730}, 'doi_Deva-guj_Gujr': {'average_sentence1_length': 57.38, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 163717}, 'doi_Deva-hin_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 165412}, 'doi_Deva-kan_Knda': {'average_sentence1_length': 57.38, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 170625}, 'doi_Deva-kas_Arab': {'average_sentence1_length': 57.38, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 170129}, 'doi_Deva-mai_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 167856}, 'doi_Deva-mal_Mlym': {'average_sentence1_length': 57.38, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 178286}, 'doi_Deva-mar_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 168195}, 'doi_Deva-mni_Mtei': {'average_sentence1_length': 57.38, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 162764}, 'doi_Deva-npi_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 166354}, 'doi_Deva-ory_Orya': {'average_sentence1_length': 57.38, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 169671}, 'doi_Deva-pan_Guru': {'average_sentence1_length': 57.38, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 165649}, 'doi_Deva-san_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 163541}, 'doi_Deva-sat_Olck': {'average_sentence1_length': 57.38, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 174827}, 'doi_Deva-snd_Deva': {'average_sentence1_length': 57.38, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 168071}, 'doi_Deva-tam_Taml': {'average_sentence1_length': 57.38, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 180314}, 'doi_Deva-tel_Telu': {'average_sentence1_length': 57.38, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 163138}, 'doi_Deva-urd_Arab': {'average_sentence1_length': 57.38, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 166753}, 'eng_Latn-asm_Beng': {'average_sentence1_length': 53.18, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 160716}, 'eng_Latn-ben_Beng': {'average_sentence1_length': 53.18, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155120}, 'eng_Latn-brx_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 161176}, 'eng_Latn-doi_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 166164}, 'eng_Latn-gom_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 155414}, 'eng_Latn-guj_Gujr': {'average_sentence1_length': 53.18, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 157401}, 'eng_Latn-hin_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159096}, 'eng_Latn-kan_Knda': {'average_sentence1_length': 53.18, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 164309}, 'eng_Latn-kas_Arab': {'average_sentence1_length': 53.18, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 163813}, 'eng_Latn-mai_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 161540}, 'eng_Latn-mal_Mlym': {'average_sentence1_length': 53.18, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 171970}, 'eng_Latn-mar_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 161879}, 'eng_Latn-mni_Mtei': {'average_sentence1_length': 53.18, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 156448}, 'eng_Latn-npi_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 160038}, 'eng_Latn-ory_Orya': {'average_sentence1_length': 53.18, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 163355}, 'eng_Latn-pan_Guru': {'average_sentence1_length': 53.18, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 159333}, 'eng_Latn-san_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 157225}, 'eng_Latn-sat_Olck': {'average_sentence1_length': 53.18, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 168511}, 'eng_Latn-snd_Deva': {'average_sentence1_length': 53.18, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161755}, 'eng_Latn-tam_Taml': {'average_sentence1_length': 53.18, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 173998}, 'eng_Latn-tel_Telu': {'average_sentence1_length': 53.18, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 156822}, 'eng_Latn-urd_Arab': {'average_sentence1_length': 53.18, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 160437}, 'gom_Deva-asm_Beng': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 156282}, 'gom_Deva-ben_Beng': {'average_sentence1_length': 50.23, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 150686}, 'gom_Deva-brx_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 156742}, 'gom_Deva-doi_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 161730}, 'gom_Deva-eng_Latn': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 155414}, 'gom_Deva-guj_Gujr': {'average_sentence1_length': 50.23, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 152967}, 'gom_Deva-hin_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 154662}, 'gom_Deva-kan_Knda': {'average_sentence1_length': 50.23, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 159875}, 'gom_Deva-kas_Arab': {'average_sentence1_length': 50.23, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 159379}, 'gom_Deva-mai_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 157106}, 'gom_Deva-mal_Mlym': {'average_sentence1_length': 50.23, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 167536}, 'gom_Deva-mar_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 157445}, 'gom_Deva-mni_Mtei': {'average_sentence1_length': 50.23, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 152014}, 'gom_Deva-npi_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 155604}, 'gom_Deva-ory_Orya': {'average_sentence1_length': 50.23, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 158921}, 'gom_Deva-pan_Guru': {'average_sentence1_length': 50.23, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 154899}, 'gom_Deva-san_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 152791}, 'gom_Deva-sat_Olck': {'average_sentence1_length': 50.23, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 164077}, 'gom_Deva-snd_Deva': {'average_sentence1_length': 50.23, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 157321}, 'gom_Deva-tam_Taml': {'average_sentence1_length': 50.23, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 169564}, 'gom_Deva-tel_Telu': {'average_sentence1_length': 50.23, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 152388}, 'gom_Deva-urd_Arab': {'average_sentence1_length': 50.23, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 156003}, 'guj_Gujr-asm_Beng': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 158269}, 'guj_Gujr-ben_Beng': {'average_sentence1_length': 51.55, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 152673}, 'guj_Gujr-brx_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 158729}, 'guj_Gujr-doi_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 163717}, 'guj_Gujr-eng_Latn': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 157401}, 'guj_Gujr-gom_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152967}, 'guj_Gujr-hin_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 156649}, 'guj_Gujr-kan_Knda': {'average_sentence1_length': 51.55, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 161862}, 'guj_Gujr-kas_Arab': {'average_sentence1_length': 51.55, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 161366}, 'guj_Gujr-mai_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 159093}, 'guj_Gujr-mal_Mlym': {'average_sentence1_length': 51.55, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 169523}, 'guj_Gujr-mar_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 159432}, 'guj_Gujr-mni_Mtei': {'average_sentence1_length': 51.55, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 154001}, 'guj_Gujr-npi_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 157591}, 'guj_Gujr-ory_Orya': {'average_sentence1_length': 51.55, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 160908}, 'guj_Gujr-pan_Guru': {'average_sentence1_length': 51.55, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 156886}, 'guj_Gujr-san_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 154778}, 'guj_Gujr-sat_Olck': {'average_sentence1_length': 51.55, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 166064}, 'guj_Gujr-snd_Deva': {'average_sentence1_length': 51.55, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 159308}, 'guj_Gujr-tam_Taml': {'average_sentence1_length': 51.55, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 171551}, 'guj_Gujr-tel_Telu': {'average_sentence1_length': 51.55, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 154375}, 'guj_Gujr-urd_Arab': {'average_sentence1_length': 51.55, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157990}, 'hin_Deva-asm_Beng': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 159964}, 'hin_Deva-ben_Beng': {'average_sentence1_length': 52.68, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 154368}, 'hin_Deva-brx_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 160424}, 'hin_Deva-doi_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 165412}, 'hin_Deva-eng_Latn': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 159096}, 'hin_Deva-gom_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 154662}, 'hin_Deva-guj_Gujr': {'average_sentence1_length': 52.68, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 156649}, 'hin_Deva-kan_Knda': {'average_sentence1_length': 52.68, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 163557}, 'hin_Deva-kas_Arab': {'average_sentence1_length': 52.68, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 163061}, 'hin_Deva-mai_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 160788}, 'hin_Deva-mal_Mlym': {'average_sentence1_length': 52.68, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 171218}, 'hin_Deva-mar_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 161127}, 'hin_Deva-mni_Mtei': {'average_sentence1_length': 52.68, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 155696}, 'hin_Deva-npi_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 159286}, 'hin_Deva-ory_Orya': {'average_sentence1_length': 52.68, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 162603}, 'hin_Deva-pan_Guru': {'average_sentence1_length': 52.68, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 158581}, 'hin_Deva-san_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 156473}, 'hin_Deva-sat_Olck': {'average_sentence1_length': 52.68, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 167759}, 'hin_Deva-snd_Deva': {'average_sentence1_length': 52.68, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161003}, 'hin_Deva-tam_Taml': {'average_sentence1_length': 52.68, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 173246}, 'hin_Deva-tel_Telu': {'average_sentence1_length': 52.68, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 156070}, 'hin_Deva-urd_Arab': {'average_sentence1_length': 52.68, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 159685}, 'kan_Knda-asm_Beng': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 165177}, 'kan_Knda-ben_Beng': {'average_sentence1_length': 56.14, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 159581}, 'kan_Knda-brx_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 165637}, 'kan_Knda-doi_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 170625}, 'kan_Knda-eng_Latn': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 164309}, 'kan_Knda-gom_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 159875}, 'kan_Knda-guj_Gujr': {'average_sentence1_length': 56.14, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 161862}, 'kan_Knda-hin_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 163557}, 'kan_Knda-kas_Arab': {'average_sentence1_length': 56.14, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 168274}, 'kan_Knda-mai_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 166001}, 'kan_Knda-mal_Mlym': {'average_sentence1_length': 56.14, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 176431}, 'kan_Knda-mar_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 166340}, 'kan_Knda-mni_Mtei': {'average_sentence1_length': 56.14, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 160909}, 'kan_Knda-npi_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 164499}, 'kan_Knda-ory_Orya': {'average_sentence1_length': 56.14, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 167816}, 'kan_Knda-pan_Guru': {'average_sentence1_length': 56.14, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 163794}, 'kan_Knda-san_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 161686}, 'kan_Knda-sat_Olck': {'average_sentence1_length': 56.14, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 172972}, 'kan_Knda-snd_Deva': {'average_sentence1_length': 56.14, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 166216}, 'kan_Knda-tam_Taml': {'average_sentence1_length': 56.14, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 178459}, 'kan_Knda-tel_Telu': {'average_sentence1_length': 56.14, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 161283}, 'kan_Knda-urd_Arab': {'average_sentence1_length': 56.14, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 164898}, 'kas_Arab-asm_Beng': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 164681}, 'kas_Arab-ben_Beng': {'average_sentence1_length': 55.81, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 159085}, 'kas_Arab-brx_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 165141}, 'kas_Arab-doi_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 170129}, 'kas_Arab-eng_Latn': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 163813}, 'kas_Arab-gom_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 159379}, 'kas_Arab-guj_Gujr': {'average_sentence1_length': 55.81, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 161366}, 'kas_Arab-hin_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 163061}, 'kas_Arab-kan_Knda': {'average_sentence1_length': 55.81, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 168274}, 'kas_Arab-mai_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 165505}, 'kas_Arab-mal_Mlym': {'average_sentence1_length': 55.81, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 175935}, 'kas_Arab-mar_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 165844}, 'kas_Arab-mni_Mtei': {'average_sentence1_length': 55.81, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 160413}, 'kas_Arab-npi_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 164003}, 'kas_Arab-ory_Orya': {'average_sentence1_length': 55.81, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 167320}, 'kas_Arab-pan_Guru': {'average_sentence1_length': 55.81, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 163298}, 'kas_Arab-san_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 161190}, 'kas_Arab-sat_Olck': {'average_sentence1_length': 55.81, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 172476}, 'kas_Arab-snd_Deva': {'average_sentence1_length': 55.81, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 165720}, 'kas_Arab-tam_Taml': {'average_sentence1_length': 55.81, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 177963}, 'kas_Arab-tel_Telu': {'average_sentence1_length': 55.81, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 160787}, 'kas_Arab-urd_Arab': {'average_sentence1_length': 55.81, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 164402}, 'mai_Deva-asm_Beng': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162408}, 'mai_Deva-ben_Beng': {'average_sentence1_length': 54.3, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 156812}, 'mai_Deva-brx_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 162868}, 'mai_Deva-doi_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 167856}, 'mai_Deva-eng_Latn': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161540}, 'mai_Deva-gom_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 157106}, 'mai_Deva-guj_Gujr': {'average_sentence1_length': 54.3, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 159093}, 'mai_Deva-hin_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 160788}, 'mai_Deva-kan_Knda': {'average_sentence1_length': 54.3, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 166001}, 'mai_Deva-kas_Arab': {'average_sentence1_length': 54.3, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165505}, 'mai_Deva-mal_Mlym': {'average_sentence1_length': 54.3, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 173662}, 'mai_Deva-mar_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 163571}, 'mai_Deva-mni_Mtei': {'average_sentence1_length': 54.3, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 158140}, 'mai_Deva-npi_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 161730}, 'mai_Deva-ory_Orya': {'average_sentence1_length': 54.3, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 165047}, 'mai_Deva-pan_Guru': {'average_sentence1_length': 54.3, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 161025}, 'mai_Deva-san_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 158917}, 'mai_Deva-sat_Olck': {'average_sentence1_length': 54.3, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 170203}, 'mai_Deva-snd_Deva': {'average_sentence1_length': 54.3, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 163447}, 'mai_Deva-tam_Taml': {'average_sentence1_length': 54.3, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 175690}, 'mai_Deva-tel_Telu': {'average_sentence1_length': 54.3, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158514}, 'mai_Deva-urd_Arab': {'average_sentence1_length': 54.3, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 162129}, 'mal_Mlym-asm_Beng': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 172838}, 'mal_Mlym-ben_Beng': {'average_sentence1_length': 61.24, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 167242}, 'mal_Mlym-brx_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 173298}, 'mal_Mlym-doi_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 178286}, 'mal_Mlym-eng_Latn': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 171970}, 'mal_Mlym-gom_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 167536}, 'mal_Mlym-guj_Gujr': {'average_sentence1_length': 61.24, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 169523}, 'mal_Mlym-hin_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 171218}, 'mal_Mlym-kan_Knda': {'average_sentence1_length': 61.24, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 176431}, 'mal_Mlym-kas_Arab': {'average_sentence1_length': 61.24, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 175935}, 'mal_Mlym-mai_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 173662}, 'mal_Mlym-mar_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 174001}, 'mal_Mlym-mni_Mtei': {'average_sentence1_length': 61.24, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 168570}, 'mal_Mlym-npi_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 172160}, 'mal_Mlym-ory_Orya': {'average_sentence1_length': 61.24, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 175477}, 'mal_Mlym-pan_Guru': {'average_sentence1_length': 61.24, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 171455}, 'mal_Mlym-san_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 169347}, 'mal_Mlym-sat_Olck': {'average_sentence1_length': 61.24, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 180633}, 'mal_Mlym-snd_Deva': {'average_sentence1_length': 61.24, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 173877}, 'mal_Mlym-tam_Taml': {'average_sentence1_length': 61.24, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 186120}, 'mal_Mlym-tel_Telu': {'average_sentence1_length': 61.24, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 168944}, 'mal_Mlym-urd_Arab': {'average_sentence1_length': 61.24, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 172559}, 'mar_Deva-asm_Beng': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162747}, 'mar_Deva-ben_Beng': {'average_sentence1_length': 54.53, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 157151}, 'mar_Deva-brx_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 163207}, 'mar_Deva-doi_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 168195}, 'mar_Deva-eng_Latn': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161879}, 'mar_Deva-gom_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 157445}, 'mar_Deva-guj_Gujr': {'average_sentence1_length': 54.53, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 159432}, 'mar_Deva-hin_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 161127}, 'mar_Deva-kan_Knda': {'average_sentence1_length': 54.53, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 166340}, 'mar_Deva-kas_Arab': {'average_sentence1_length': 54.53, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165844}, 'mar_Deva-mai_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 163571}, 'mar_Deva-mal_Mlym': {'average_sentence1_length': 54.53, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 174001}, 'mar_Deva-mni_Mtei': {'average_sentence1_length': 54.53, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 158479}, 'mar_Deva-npi_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 162069}, 'mar_Deva-ory_Orya': {'average_sentence1_length': 54.53, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 165386}, 'mar_Deva-pan_Guru': {'average_sentence1_length': 54.53, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 161364}, 'mar_Deva-san_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 159256}, 'mar_Deva-sat_Olck': {'average_sentence1_length': 54.53, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 170542}, 'mar_Deva-snd_Deva': {'average_sentence1_length': 54.53, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 163786}, 'mar_Deva-tam_Taml': {'average_sentence1_length': 54.53, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 176029}, 'mar_Deva-tel_Telu': {'average_sentence1_length': 54.53, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158853}, 'mar_Deva-urd_Arab': {'average_sentence1_length': 54.53, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 162468}, 'mni_Mtei-asm_Beng': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 157316}, 'mni_Mtei-ben_Beng': {'average_sentence1_length': 50.91, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 151720}, 'mni_Mtei-brx_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 157776}, 'mni_Mtei-doi_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 162764}, 'mni_Mtei-eng_Latn': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 156448}, 'mni_Mtei-gom_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152014}, 'mni_Mtei-guj_Gujr': {'average_sentence1_length': 50.91, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 154001}, 'mni_Mtei-hin_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 155696}, 'mni_Mtei-kan_Knda': {'average_sentence1_length': 50.91, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 160909}, 'mni_Mtei-kas_Arab': {'average_sentence1_length': 50.91, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 160413}, 'mni_Mtei-mai_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 158140}, 'mni_Mtei-mal_Mlym': {'average_sentence1_length': 50.91, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 168570}, 'mni_Mtei-mar_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 158479}, 'mni_Mtei-npi_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 156638}, 'mni_Mtei-ory_Orya': {'average_sentence1_length': 50.91, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 159955}, 'mni_Mtei-pan_Guru': {'average_sentence1_length': 50.91, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 155933}, 'mni_Mtei-san_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 153825}, 'mni_Mtei-sat_Olck': {'average_sentence1_length': 50.91, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 165111}, 'mni_Mtei-snd_Deva': {'average_sentence1_length': 50.91, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 158355}, 'mni_Mtei-tam_Taml': {'average_sentence1_length': 50.91, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 170598}, 'mni_Mtei-tel_Telu': {'average_sentence1_length': 50.91, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 153422}, 'mni_Mtei-urd_Arab': {'average_sentence1_length': 50.91, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157037}, 'npi_Deva-asm_Beng': {'average_sentence1_length': 53.3, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 160906}, 'npi_Deva-ben_Beng': {'average_sentence1_length': 53.3, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155310}, 'npi_Deva-brx_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 161366}, 'npi_Deva-doi_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 166354}, 'npi_Deva-eng_Latn': {'average_sentence1_length': 53.3, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 160038}, 'npi_Deva-gom_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 155604}, 'npi_Deva-guj_Gujr': {'average_sentence1_length': 53.3, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 157591}, 'npi_Deva-hin_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159286}, 'npi_Deva-kan_Knda': {'average_sentence1_length': 53.3, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 164499}, 'npi_Deva-kas_Arab': {'average_sentence1_length': 53.3, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 164003}, 'npi_Deva-mai_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 161730}, 'npi_Deva-mal_Mlym': {'average_sentence1_length': 53.3, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 172160}, 'npi_Deva-mar_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 162069}, 'npi_Deva-mni_Mtei': {'average_sentence1_length': 53.3, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 156638}, 'npi_Deva-ory_Orya': {'average_sentence1_length': 53.3, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 163545}, 'npi_Deva-pan_Guru': {'average_sentence1_length': 53.3, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 159523}, 'npi_Deva-san_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 157415}, 'npi_Deva-sat_Olck': {'average_sentence1_length': 53.3, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 168701}, 'npi_Deva-snd_Deva': {'average_sentence1_length': 53.3, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161945}, 'npi_Deva-tam_Taml': {'average_sentence1_length': 53.3, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 174188}, 'npi_Deva-tel_Telu': {'average_sentence1_length': 53.3, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 157012}, 'npi_Deva-urd_Arab': {'average_sentence1_length': 53.3, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 160627}, 'ory_Orya-asm_Beng': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 164223}, 'ory_Orya-ben_Beng': {'average_sentence1_length': 55.51, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 158627}, 'ory_Orya-brx_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 164683}, 'ory_Orya-doi_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 169671}, 'ory_Orya-eng_Latn': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 163355}, 'ory_Orya-gom_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 158921}, 'ory_Orya-guj_Gujr': {'average_sentence1_length': 55.51, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 160908}, 'ory_Orya-hin_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 162603}, 'ory_Orya-kan_Knda': {'average_sentence1_length': 55.51, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 167816}, 'ory_Orya-kas_Arab': {'average_sentence1_length': 55.51, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 167320}, 'ory_Orya-mai_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 165047}, 'ory_Orya-mal_Mlym': {'average_sentence1_length': 55.51, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 175477}, 'ory_Orya-mar_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 165386}, 'ory_Orya-mni_Mtei': {'average_sentence1_length': 55.51, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 159955}, 'ory_Orya-npi_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 163545}, 'ory_Orya-pan_Guru': {'average_sentence1_length': 55.51, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 162840}, 'ory_Orya-san_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 160732}, 'ory_Orya-sat_Olck': {'average_sentence1_length': 55.51, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 172018}, 'ory_Orya-snd_Deva': {'average_sentence1_length': 55.51, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 165262}, 'ory_Orya-tam_Taml': {'average_sentence1_length': 55.51, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 177505}, 'ory_Orya-tel_Telu': {'average_sentence1_length': 55.51, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 160329}, 'ory_Orya-urd_Arab': {'average_sentence1_length': 55.51, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 163944}, 'pan_Guru-asm_Beng': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 160201}, 'pan_Guru-ben_Beng': {'average_sentence1_length': 52.83, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 154605}, 'pan_Guru-brx_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 160661}, 'pan_Guru-doi_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 165649}, 'pan_Guru-eng_Latn': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 159333}, 'pan_Guru-gom_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 154899}, 'pan_Guru-guj_Gujr': {'average_sentence1_length': 52.83, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 156886}, 'pan_Guru-hin_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 158581}, 'pan_Guru-kan_Knda': {'average_sentence1_length': 52.83, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 163794}, 'pan_Guru-kas_Arab': {'average_sentence1_length': 52.83, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 163298}, 'pan_Guru-mai_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 161025}, 'pan_Guru-mal_Mlym': {'average_sentence1_length': 52.83, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 171455}, 'pan_Guru-mar_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 161364}, 'pan_Guru-mni_Mtei': {'average_sentence1_length': 52.83, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 155933}, 'pan_Guru-npi_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 159523}, 'pan_Guru-ory_Orya': {'average_sentence1_length': 52.83, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 162840}, 'pan_Guru-san_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 156710}, 'pan_Guru-sat_Olck': {'average_sentence1_length': 52.83, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 167996}, 'pan_Guru-snd_Deva': {'average_sentence1_length': 52.83, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 161240}, 'pan_Guru-tam_Taml': {'average_sentence1_length': 52.83, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 173483}, 'pan_Guru-tel_Telu': {'average_sentence1_length': 52.83, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 156307}, 'pan_Guru-urd_Arab': {'average_sentence1_length': 52.83, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 159922}, 'san_Deva-asm_Beng': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 158093}, 'san_Deva-ben_Beng': {'average_sentence1_length': 51.43, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 152497}, 'san_Deva-brx_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 158553}, 'san_Deva-doi_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 163541}, 'san_Deva-eng_Latn': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 157225}, 'san_Deva-gom_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152791}, 'san_Deva-guj_Gujr': {'average_sentence1_length': 51.43, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 154778}, 'san_Deva-hin_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 156473}, 'san_Deva-kan_Knda': {'average_sentence1_length': 51.43, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 161686}, 'san_Deva-kas_Arab': {'average_sentence1_length': 51.43, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 161190}, 'san_Deva-mai_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 158917}, 'san_Deva-mal_Mlym': {'average_sentence1_length': 51.43, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 169347}, 'san_Deva-mar_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 159256}, 'san_Deva-mni_Mtei': {'average_sentence1_length': 51.43, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 153825}, 'san_Deva-npi_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 157415}, 'san_Deva-ory_Orya': {'average_sentence1_length': 51.43, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 160732}, 'san_Deva-pan_Guru': {'average_sentence1_length': 51.43, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 156710}, 'san_Deva-sat_Olck': {'average_sentence1_length': 51.43, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 165888}, 'san_Deva-snd_Deva': {'average_sentence1_length': 51.43, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 159132}, 'san_Deva-tam_Taml': {'average_sentence1_length': 51.43, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 171375}, 'san_Deva-tel_Telu': {'average_sentence1_length': 51.43, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 154199}, 'san_Deva-urd_Arab': {'average_sentence1_length': 51.43, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157814}, 'sat_Olck-asm_Beng': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 169379}, 'sat_Olck-ben_Beng': {'average_sentence1_length': 58.94, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 163783}, 'sat_Olck-brx_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 169839}, 'sat_Olck-doi_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 174827}, 'sat_Olck-eng_Latn': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 168511}, 'sat_Olck-gom_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 164077}, 'sat_Olck-guj_Gujr': {'average_sentence1_length': 58.94, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 166064}, 'sat_Olck-hin_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 167759}, 'sat_Olck-kan_Knda': {'average_sentence1_length': 58.94, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 172972}, 'sat_Olck-kas_Arab': {'average_sentence1_length': 58.94, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 172476}, 'sat_Olck-mai_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 170203}, 'sat_Olck-mal_Mlym': {'average_sentence1_length': 58.94, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 180633}, 'sat_Olck-mar_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 170542}, 'sat_Olck-mni_Mtei': {'average_sentence1_length': 58.94, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 165111}, 'sat_Olck-npi_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 168701}, 'sat_Olck-ory_Orya': {'average_sentence1_length': 58.94, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 172018}, 'sat_Olck-pan_Guru': {'average_sentence1_length': 58.94, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 167996}, 'sat_Olck-san_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 165888}, 'sat_Olck-snd_Deva': {'average_sentence1_length': 58.94, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 170418}, 'sat_Olck-tam_Taml': {'average_sentence1_length': 58.94, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 182661}, 'sat_Olck-tel_Telu': {'average_sentence1_length': 58.94, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 165485}, 'sat_Olck-urd_Arab': {'average_sentence1_length': 58.94, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 169100}, 'snd_Deva-asm_Beng': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 162623}, 'snd_Deva-ben_Beng': {'average_sentence1_length': 54.45, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 157027}, 'snd_Deva-brx_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 163083}, 'snd_Deva-doi_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 168071}, 'snd_Deva-eng_Latn': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 161755}, 'snd_Deva-gom_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 157321}, 'snd_Deva-guj_Gujr': {'average_sentence1_length': 54.45, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 159308}, 'snd_Deva-hin_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 161003}, 'snd_Deva-kan_Knda': {'average_sentence1_length': 54.45, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 166216}, 'snd_Deva-kas_Arab': {'average_sentence1_length': 54.45, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 165720}, 'snd_Deva-mai_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 163447}, 'snd_Deva-mal_Mlym': {'average_sentence1_length': 54.45, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 173877}, 'snd_Deva-mar_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 163786}, 'snd_Deva-mni_Mtei': {'average_sentence1_length': 54.45, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 158355}, 'snd_Deva-npi_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 161945}, 'snd_Deva-ory_Orya': {'average_sentence1_length': 54.45, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 165262}, 'snd_Deva-pan_Guru': {'average_sentence1_length': 54.45, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 161240}, 'snd_Deva-san_Deva': {'average_sentence1_length': 54.45, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 159132}, 'snd_Deva-sat_Olck': {'average_sentence1_length': 54.45, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 170418}, 'snd_Deva-tam_Taml': {'average_sentence1_length': 54.45, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 175905}, 'snd_Deva-tel_Telu': {'average_sentence1_length': 54.45, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 158729}, 'snd_Deva-urd_Arab': {'average_sentence1_length': 54.45, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 162344}, 'tam_Taml-asm_Beng': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 174866}, 'tam_Taml-ben_Beng': {'average_sentence1_length': 62.59, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 169270}, 'tam_Taml-brx_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 175326}, 'tam_Taml-doi_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 180314}, 'tam_Taml-eng_Latn': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 173998}, 'tam_Taml-gom_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 169564}, 'tam_Taml-guj_Gujr': {'average_sentence1_length': 62.59, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 171551}, 'tam_Taml-hin_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 173246}, 'tam_Taml-kan_Knda': {'average_sentence1_length': 62.59, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 178459}, 'tam_Taml-kas_Arab': {'average_sentence1_length': 62.59, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 177963}, 'tam_Taml-mai_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 175690}, 'tam_Taml-mal_Mlym': {'average_sentence1_length': 62.59, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 186120}, 'tam_Taml-mar_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 176029}, 'tam_Taml-mni_Mtei': {'average_sentence1_length': 62.59, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 170598}, 'tam_Taml-npi_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 174188}, 'tam_Taml-ory_Orya': {'average_sentence1_length': 62.59, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 177505}, 'tam_Taml-pan_Guru': {'average_sentence1_length': 62.59, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 173483}, 'tam_Taml-san_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 171375}, 'tam_Taml-sat_Olck': {'average_sentence1_length': 62.59, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 182661}, 'tam_Taml-snd_Deva': {'average_sentence1_length': 62.59, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 175905}, 'tam_Taml-tel_Telu': {'average_sentence1_length': 62.59, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 170972}, 'tam_Taml-urd_Arab': {'average_sentence1_length': 62.59, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 174587}, 'tel_Telu-asm_Beng': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 157690}, 'tel_Telu-ben_Beng': {'average_sentence1_length': 51.16, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 152094}, 'tel_Telu-brx_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 158150}, 'tel_Telu-doi_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 163138}, 'tel_Telu-eng_Latn': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 156822}, 'tel_Telu-gom_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 152388}, 'tel_Telu-guj_Gujr': {'average_sentence1_length': 51.16, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 154375}, 'tel_Telu-hin_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 156070}, 'tel_Telu-kan_Knda': {'average_sentence1_length': 51.16, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 161283}, 'tel_Telu-kas_Arab': {'average_sentence1_length': 51.16, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 160787}, 'tel_Telu-mai_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 158514}, 'tel_Telu-mal_Mlym': {'average_sentence1_length': 51.16, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 168944}, 'tel_Telu-mar_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 158853}, 'tel_Telu-mni_Mtei': {'average_sentence1_length': 51.16, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 153422}, 'tel_Telu-npi_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 157012}, 'tel_Telu-ory_Orya': {'average_sentence1_length': 51.16, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 160329}, 'tel_Telu-pan_Guru': {'average_sentence1_length': 51.16, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 156307}, 'tel_Telu-san_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 154199}, 'tel_Telu-sat_Olck': {'average_sentence1_length': 51.16, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 165485}, 'tel_Telu-snd_Deva': {'average_sentence1_length': 51.16, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 158729}, 'tel_Telu-tam_Taml': {'average_sentence1_length': 51.16, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 170972}, 'tel_Telu-urd_Arab': {'average_sentence1_length': 51.16, 'average_sentence2_length': 53.57, 'num_samples': 1503, 'number_of_characters': 157411}, 'urd_Arab-asm_Beng': {'average_sentence1_length': 53.57, 'average_sentence2_length': 53.75, 'num_samples': 1503, 'number_of_characters': 161305}, 'urd_Arab-ben_Beng': {'average_sentence1_length': 53.57, 'average_sentence2_length': 50.03, 'num_samples': 1503, 'number_of_characters': 155709}, 'urd_Arab-brx_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.06, 'num_samples': 1503, 'number_of_characters': 161765}, 'urd_Arab-doi_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 57.38, 'num_samples': 1503, 'number_of_characters': 166753}, 'urd_Arab-eng_Latn': {'average_sentence1_length': 53.57, 'average_sentence2_length': 53.18, 'num_samples': 1503, 'number_of_characters': 160437}, 'urd_Arab-gom_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 50.23, 'num_samples': 1503, 'number_of_characters': 156003}, 'urd_Arab-guj_Gujr': {'average_sentence1_length': 53.57, 'average_sentence2_length': 51.55, 'num_samples': 1503, 'number_of_characters': 157990}, 'urd_Arab-hin_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 52.68, 'num_samples': 1503, 'number_of_characters': 159685}, 'urd_Arab-kan_Knda': {'average_sentence1_length': 53.57, 'average_sentence2_length': 56.14, 'num_samples': 1503, 'number_of_characters': 164898}, 'urd_Arab-kas_Arab': {'average_sentence1_length': 53.57, 'average_sentence2_length': 55.81, 'num_samples': 1503, 'number_of_characters': 164402}, 'urd_Arab-mai_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.3, 'num_samples': 1503, 'number_of_characters': 162129}, 'urd_Arab-mal_Mlym': {'average_sentence1_length': 53.57, 'average_sentence2_length': 61.24, 'num_samples': 1503, 'number_of_characters': 172559}, 'urd_Arab-mar_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.53, 'num_samples': 1503, 'number_of_characters': 162468}, 'urd_Arab-mni_Mtei': {'average_sentence1_length': 53.57, 'average_sentence2_length': 50.91, 'num_samples': 1503, 'number_of_characters': 157037}, 'urd_Arab-npi_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 53.3, 'num_samples': 1503, 'number_of_characters': 160627}, 'urd_Arab-ory_Orya': {'average_sentence1_length': 53.57, 'average_sentence2_length': 55.51, 'num_samples': 1503, 'number_of_characters': 163944}, 'urd_Arab-pan_Guru': {'average_sentence1_length': 53.57, 'average_sentence2_length': 52.83, 'num_samples': 1503, 'number_of_characters': 159922}, 'urd_Arab-san_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 51.43, 'num_samples': 1503, 'number_of_characters': 157814}, 'urd_Arab-sat_Olck': {'average_sentence1_length': 53.57, 'average_sentence2_length': 58.94, 'num_samples': 1503, 'number_of_characters': 169100}, 'urd_Arab-snd_Deva': {'average_sentence1_length': 53.57, 'average_sentence2_length': 54.45, 'num_samples': 1503, 'number_of_characters': 162344}, 'urd_Arab-tam_Taml': {'average_sentence1_length': 53.57, 'average_sentence2_length': 62.59, 'num_samples': 1503, 'number_of_characters': 174587}, 'urd_Arab-tel_Telu': {'average_sentence1_length': 53.57, 'average_sentence2_length': 51.16, 'num_samples': 1503, 'number_of_characters': 157411}}}} |
IN22GenBitextMining (Jay Gala, 2023) |
['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] |
BitextMining |
s2s |
[Web, Legal, Government, News, Religious, Non-fiction, Written] |
None |
None |
IWSLT2017BitextMining |
['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'jpn', 'kor', 'nld', 'ron'] |
BitextMining |
s2s |
[Non-fiction, Fiction, Written] |
None |
None |
ImdbClassification |
['eng'] |
Classification |
p2p |
[Reviews, Written] |
None |
None |
InappropriatenessClassification |
['rus'] |
Classification |
s2s |
[Web, Social, Written] |
None |
None |
IndicCrosslingualSTS (Ramesh et al., 2022) |
['asm', 'ben', 'eng', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] |
STS |
s2s |
[News, Non-fiction, Web, Spoken, Government, Written, Spoken] |
None |
None |
IndicGenBenchFloresBitextMining (Harman Singh, 2024) |
['asm', 'awa', 'ben', 'bgc', 'bho', 'bod', 'boy', 'eng', 'gbm', 'gom', 'guj', 'hin', 'hne', 'kan', 'mai', 'mal', 'mar', 'mni', 'mup', 'mwr', 'nep', 'ory', 'pan', 'pus', 'raj', 'san', 'sat', 'tam', 'tel', 'urd'] |
BitextMining |
s2s |
[Web, News, Written] |
None |
None |
IndicLangClassification |
['asm', 'ben', 'brx', 'doi', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] |
Classification |
s2s |
[Web, Non-fiction, Written] |
None |
None |
IndicNLPNewsClassification (Anoop Kunchukuttan, 2020) |
['guj', 'kan', 'mal', 'mar', 'ori', 'pan', 'tam', 'tel'] |
Classification |
s2s |
[News, Written] |
None |
None |
IndicQARetrieval (Sumanth Doddapaneni, 2022) |
['asm', 'ben', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel'] |
Retrieval |
s2p |
[Web, Written] |
None |
None |
IndicReviewsClusteringP2P (Sumanth Doddapaneni, 2022) |
['asm', 'ben', 'brx', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] |
Clustering |
p2p |
[Reviews, Written] |
None |
None |
IndicSentimentClassification (Sumanth Doddapaneni, 2022) |
['asm', 'ben', 'brx', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
IndonesianIdClickbaitClassification |
['ind'] |
Classification |
s2s |
[News, Written] |
None |
None |
IndonesianMongabayConservationClassification |
['ind'] |
Classification |
s2s |
[Web, Written] |
None |
None |
InsurancePolicyInterpretationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
InternationalCitizenshipQuestionsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
IsiZuluNewsClassification (Madodonga et al., 2023) |
['zul'] |
Classification |
s2s |
[News, Written] |
None |
None |
ItaCaseholdClassification (Licari et al., 2023) |
['ita'] |
Classification |
s2s |
[Legal, Government, Written] |
None |
None |
Itacola |
['ita'] |
Classification |
s2s |
[Non-fiction, Spoken, Written] |
None |
None |
JCrewBlockerLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
JDReview (Xiao et al., 2023) |
['cmn'] |
Classification |
s2s |
|
None |
None |
JSICK (Yanaka et al., 2022) |
['jpn'] |
STS |
s2s |
[Web, Written] |
None |
None |
JSTS |
['jpn'] |
STS |
s2s |
[Web, Written] |
None |
None |
JaGovFaqsRetrieval |
['jpn'] |
Retrieval |
s2s |
[Web, Written] |
None |
None |
JaQuADRetrieval (ByungHoon So, 2022) |
['jpn'] |
Retrieval |
p2p |
[Encyclopaedic, Non-fiction, Written] |
None |
None |
JaqketRetrieval |
['jpn'] |
Retrieval |
s2p |
[Encyclopaedic, Non-fiction, Written] |
{'test': 115226} |
{'test': {'number_of_characters': 3799.7, 'num_samples': 115226, 'num_queries': 997, 'num_documents': 114229, 'average_document_length': 0.03, 'average_query_length': 0.05, 'average_relevant_docs_per_query': 1.0}} |
JavaneseIMDBClassification (Wongso et al., 2021) |
['jav'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
KLUE-NLI (Sungjoon Park, 2021) |
['kor'] |
PairClassification |
s2s |
[News, Encyclopaedic, Written] |
None |
None |
KLUE-STS (Sungjoon Park, 2021) |
['kor'] |
STS |
s2s |
[Reviews, News, Spoken, Written, Spoken] |
None |
None |
KLUE-TC (Sungjoon Park, 2021) |
['kor'] |
Classification |
s2s |
[News, Written] |
None |
None |
KannadaNewsClassification (Anoop Kunchukuttan, 2020) |
['kan'] |
Classification |
s2s |
[News, Written] |
None |
None |
KinopoiskClassification (Blinov et al., 2013) |
['rus'] |
Classification |
p2p |
[Reviews, Written] |
None |
None |
Ko-StrategyQA (Geva et al., 2021) |
['kor'] |
Retrieval |
s2p |
|
None |
None |
KorFin (Son et al., 2023) |
['kor'] |
Classification |
s2s |
[News, Written] |
None |
None |
KorHateClassification (Jihyung Moon, 2020) |
['kor'] |
Classification |
s2s |
[Social, Written] |
None |
None |
KorHateSpeechMLClassification |
['kor'] |
MultilabelClassification |
s2s |
[Social, Written] |
None |
None |
KorSTS (Ham et al., 2020) |
['kor'] |
STS |
s2s |
[News, Web] |
None |
None |
KorSarcasmClassification (Kim et al., 2019) |
['kor'] |
Classification |
s2s |
[Social, Written] |
None |
None |
KurdishSentimentClassification (Badawi et al., 2024) |
['kur'] |
Classification |
s2s |
[Web, Written] |
None |
None |
LCQMC (Shitao Xiao, 2024) |
['cmn'] |
STS |
s2s |
|
None |
None |
LEMBNarrativeQARetrieval |
['eng'] |
Retrieval |
s2p |
[Fiction, Non-fiction, Written] |
None |
None |
LEMBNeedleRetrieval (Zhu et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Academic, Blog, Written] |
None |
None |
LEMBPasskeyRetrieval (Zhu et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Fiction, Written] |
None |
None |
LEMBQMSumRetrieval |
['eng'] |
Retrieval |
s2p |
[Spoken, Written] |
None |
None |
LEMBSummScreenFDRetrieval |
['eng'] |
Retrieval |
s2p |
[Spoken, Written] |
None |
None |
LEMBWikimQARetrieval (Ho et al., 2020) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
LanguageClassification (Conneau et al., 2018) |
['ara', 'bul', 'cmn', 'deu', 'ell', 'eng', 'fra', 'hin', 'ita', 'jpn', 'nld', 'pol', 'por', 'rus', 'spa', 'swa', 'tha', 'tur', 'urd', 'vie'] |
Classification |
s2s |
[Reviews, Web, Non-fiction, Fiction, Government, Written] |
{'test': 2048} |
{'test': {'num_samples': 2048, 'number_of_characters': 224352, 'average_text_length': 109.55, 'unique_labels': 20, 'labels': {'17': {'count': 102}, '0': {'count': 102}, '11': {'count': 102}, '4': {'count': 103}, '3': {'count': 102}, '1': {'count': 102}, '10': {'count': 102}, '2': {'count': 103}, '16': {'count': 103}, '9': {'count': 103}, '5': {'count': 102}, '7': {'count': 102}, '13': {'count': 102}, '14': {'count': 103}, '12': {'count': 102}, '15': {'count': 103}, '19': {'count': 102}, '18': {'count': 102}, '6': {'count': 103}, '8': {'count': 103}}}} |
LccSentimentClassification |
['dan'] |
Classification |
s2s |
[News, Web, Written] |
None |
None |
LeCaRDv2 (Haitao Li, 2023) |
['zho'] |
Retrieval |
p2p |
[Legal, Written] |
None |
None |
LearnedHandsBenefitsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsBusinessLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsConsumerLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsCourtsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsCrimeLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsDivorceLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsDomesticViolenceLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsEducationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsEmploymentLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsEstatesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsFamilyLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsHealthLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsHousingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsImmigrationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsTortsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LearnedHandsTrafficLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LegalBenchConsumerContractsQA (Koreeda et al., 2021) |
['eng'] |
Retrieval |
s2p |
[Legal, Written] |
None |
None |
LegalBenchCorporateLobbying (Neel Guha, 2023) |
['eng'] |
Retrieval |
s2p |
[Legal, Written] |
None |
None |
LegalBenchPC (Neel Guha, 2023) |
['eng'] |
PairClassification |
s2s |
[Legal, Written] |
None |
None |
LegalQuAD (Hoppe et al., 2021) |
['deu'] |
Retrieval |
s2p |
[Legal, Written] |
None |
None |
LegalReasoningCausalityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
LegalSummarization |
['eng'] |
Retrieval |
s2p |
[Legal, Written] |
None |
None |
LinceMTBitextMining (Aguilar et al., 2020) |
['eng', 'hin'] |
BitextMining |
s2s |
[Social, Written] |
None |
None |
LitSearchRetrieval (Ajith et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Academic, Non-fiction, Written] |
None |
None |
LivedoorNewsClustering.v2 |
['jpn'] |
Clustering |
s2s |
[News, Written] |
None |
None |
MAUDLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
MIRACLReranking (Zhang et al., 2023) |
['ara', 'ben', 'deu', 'eng', 'fas', 'fin', 'fra', 'hin', 'ind', 'jpn', 'kor', 'rus', 'spa', 'swa', 'tel', 'tha', 'yor', 'zho'] |
Reranking |
s2s |
[Encyclopaedic, Written] |
None |
None |
MIRACLRetrieval (Zhang et al., 2023) |
['ara', 'ben', 'deu', 'eng', 'fas', 'fin', 'fra', 'hin', 'ind', 'jpn', 'kor', 'rus', 'spa', 'swa', 'tel', 'tha', 'yor', 'zho'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
MIRACLRetrievalHardNegatives (Zhang et al., 2023) |
['ara', 'ben', 'deu', 'eng', 'fas', 'fin', 'fra', 'hin', 'ind', 'jpn', 'kor', 'rus', 'spa', 'swa', 'tel', 'tha', 'yor', 'zho'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
MLQARetrieval |
['ara', 'deu', 'eng', 'hin', 'spa', 'vie', 'zho'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
MLQuestions |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Academic, Written] |
None |
None |
MLSUMClusteringP2P.v2 (Scialom et al., 2020) |
['deu', 'fra', 'rus', 'spa'] |
Clustering |
p2p |
[News, Written] |
None |
None |
MLSUMClusteringS2S.v2 (Scialom et al., 2020) |
['deu', 'fra', 'rus', 'spa'] |
Clustering |
s2s |
[News, Written] |
None |
None |
MMarcoReranking (Luiz Henrique Bonifacio, 2021) |
['cmn'] |
Reranking |
s2s |
|
None |
None |
MMarcoRetrieval (Shitao Xiao, 2024) |
['cmn'] |
Retrieval |
s2p |
|
None |
None |
MSMARCO (Tri Nguyen and Mir Rosenberg and Xia Song and Jianfeng Gao and Saurabh Tiwary and Rangan Majumder and Li Deng, 2016) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
MSMARCO-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
[Web, Written] |
None |
None |
MSMARCO-PLHardNegatives (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
[Web, Written] |
None |
None |
MSMARCOHardNegatives (Tri Nguyen and Mir Rosenberg and Xia Song and Jianfeng Gao and Saurabh Tiwary and Rangan Majumder and Li Deng, 2016) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
MSMARCOv2 (Tri Nguyen and Mir Rosenberg and Xia Song and Jianfeng Gao and Saurabh Tiwary and Rangan Majumder and Li Deng, 2016) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
MTOPDomainClassification |
['deu', 'eng', 'fra', 'hin', 'spa', 'tha'] |
Classification |
s2s |
[Spoken, Spoken] |
None |
None |
MTOPIntentClassification |
['deu', 'eng', 'fra', 'hin', 'spa', 'tha'] |
Classification |
s2s |
[Spoken, Spoken] |
None |
None |
MacedonianTweetSentimentClassification |
['mkd'] |
Classification |
s2s |
[Social, Written] |
None |
None |
MalayalamNewsClassification (Anoop Kunchukuttan, 2020) |
['mal'] |
Classification |
s2s |
[News, Written] |
None |
None |
MalteseNewsClassification |
['mlt'] |
MultilabelClassification |
s2s |
[Constructed, Written] |
None |
None |
MarathiNewsClassification (Anoop Kunchukuttan, 2020) |
['mar'] |
Classification |
s2s |
[News, Written] |
None |
None |
MasakhaNEWSClassification (David Ifeoluwa Adelani, 2023) |
['amh', 'eng', 'fra', 'hau', 'ibo', 'lin', 'lug', 'orm', 'pcm', 'run', 'sna', 'som', 'swa', 'tir', 'xho', 'yor'] |
Classification |
s2s |
[News, Written] |
None |
None |
MasakhaNEWSClusteringP2P (David Ifeoluwa Adelani, 2023) |
['amh', 'eng', 'fra', 'hau', 'ibo', 'lin', 'lug', 'orm', 'pcm', 'run', 'sna', 'som', 'swa', 'tir', 'xho', 'yor'] |
Clustering |
p2p |
[News, Written, Non-fiction] |
None |
None |
MasakhaNEWSClusteringS2S (David Ifeoluwa Adelani, 2023) |
['amh', 'eng', 'fra', 'hau', 'ibo', 'lin', 'lug', 'orm', 'pcm', 'run', 'sna', 'som', 'swa', 'tir', 'xho', 'yor'] |
Clustering |
s2s |
|
None |
None |
MassiveIntentClassification (Jack FitzGerald, 2022) |
['afr', 'amh', 'ara', 'aze', 'ben', 'cmo', 'cym', 'dan', 'deu', 'ell', 'eng', 'fas', 'fin', 'fra', 'heb', 'hin', 'hun', 'hye', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kan', 'kat', 'khm', 'kor', 'lav', 'mal', 'mon', 'msa', 'mya', 'nld', 'nob', 'pol', 'por', 'ron', 'rus', 'slv', 'spa', 'sqi', 'swa', 'swe', 'tam', 'tel', 'tgl', 'tha', 'tur', 'urd', 'vie'] |
Classification |
s2s |
[Spoken] |
None |
None |
MassiveScenarioClassification (Jack FitzGerald, 2022) |
['afr', 'amh', 'ara', 'aze', 'ben', 'cmo', 'cym', 'dan', 'deu', 'ell', 'eng', 'fas', 'fin', 'fra', 'heb', 'hin', 'hun', 'hye', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kan', 'kat', 'khm', 'kor', 'lav', 'mal', 'mon', 'msa', 'mya', 'nld', 'nob', 'pol', 'por', 'ron', 'rus', 'slv', 'spa', 'sqi', 'swa', 'swe', 'tam', 'tel', 'tgl', 'tha', 'tur', 'urd', 'vie'] |
Classification |
s2s |
[Spoken] |
None |
None |
MedicalQARetrieval (Asma et al., 2019) |
['eng'] |
Retrieval |
s2s |
[Medical, Written] |
None |
None |
MedicalRetrieval |
['cmn'] |
Retrieval |
s2p |
|
None |
None |
MedrxivClusteringP2P.v2 |
['eng'] |
Clustering |
p2p |
[Academic, Medical, Written] |
None |
None |
MedrxivClusteringS2S.v2 |
['eng'] |
Clustering |
s2s |
[Academic, Medical, Written] |
None |
None |
MewsC16JaClustering |
['jpn'] |
Clustering |
s2s |
[News, Written] |
None |
None |
MindSmallReranking |
['eng'] |
Reranking |
s2s |
[News, Written] |
None |
None |
MintakaRetrieval |
['ara', 'deu', 'fra', 'hin', 'ita', 'jpn', 'por', 'spa'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
Moroco (Andrei M. Butnaru, 2019) |
['ron'] |
Classification |
s2s |
[News, Written] |
None |
None |
MovieReviewSentimentClassification (Théophile Blard, 2020) |
['fra'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
MrTidyRetrieval (Xinyu Zhang, 2021) |
['ara', 'ben', 'eng', 'fin', 'ind', 'jpn', 'kor', 'rus', 'swa', 'tel', 'tha'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
MultiEURLEXMultilabelClassification (Chalkidis et al., 2021) |
['bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'est', 'fin', 'fra', 'hrv', 'hun', 'ita', 'lav', 'lit', 'mlt', 'nld', 'pol', 'por', 'ron', 'slk', 'slv', 'spa', 'swe'] |
MultilabelClassification |
p2p |
[Legal, Government, Written] |
{'test': 115000} |
{'test': {'average_text_length': 12014.41, 'number_of_characters': 1381657027, 'average_label_per_text': 3.59, 'num_samples': 115000, 'unique_labels': 21, 'labels': {'18': {'count': 50784}, '15': {'count': 30981}, '5': {'count': 24978}, '6': {'count': 45080}, '3': {'count': 63687}, '17': {'count': 37743}, '1': {'count': 15019}, '20': {'count': 14030}, '0': {'count': 17802}, '2': {'count': 22402}, '19': {'count': 10212}, '9': {'count': 3772}, '4': {'count': 9062}, '10': {'count': 7705}, '11': {'count': 12213}, '7': {'count': 14306}, '12': {'count': 11799}, '8': {'count': 13800}, '13': {'count': 2346}, '14': {'count': 4255}, '16': {'count': 1311}}, 'hf_subset_descriptive_stats': {'en': {'average_text_length': 11720.29, 'number_of_characters': 58601463, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'de': {'average_text_length': 12865.42, 'number_of_characters': 64327081, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'fr': {'average_text_length': 13081.11, 'number_of_characters': 65405549, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'it': {'average_text_length': 12763.48, 'number_of_characters': 63817393, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'es': {'average_text_length': 13080.29, 'number_of_characters': 65401450, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'pl': {'average_text_length': 12282.59, 'number_of_characters': 61412963, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'ro': {'average_text_length': 12836.93, 'number_of_characters': 64184661, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'nl': {'average_text_length': 12857.97, 'number_of_characters': 64289871, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'el': {'average_text_length': 12998.14, 'number_of_characters': 64990715, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'hu': {'average_text_length': 12424.64, 'number_of_characters': 62123205, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'pt': {'average_text_length': 12482.46, 'number_of_characters': 62412308, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'cs': {'average_text_length': 10783.47, 'number_of_characters': 53917338, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sv': {'average_text_length': 11612.48, 'number_of_characters': 58062387, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'bg': {'average_text_length': 12235.43, 'number_of_characters': 61177134, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'da': {'average_text_length': 11773.96, 'number_of_characters': 58869790, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'fi': {'average_text_length': 12087.69, 'number_of_characters': 60438431, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sk': {'average_text_length': 11130.81, 'number_of_characters': 55654070, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'lt': {'average_text_length': 11245.36, 'number_of_characters': 56226783, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'hr': {'average_text_length': 11022.14, 'number_of_characters': 55110710, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sl': {'average_text_length': 10620.06, 'number_of_characters': 53100297, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'et': {'average_text_length': 10898.43, 'number_of_characters': 54492156, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'lv': {'average_text_length': 10938.51, 'number_of_characters': 54692551, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'mt': {'average_text_length': 12589.74, 'number_of_characters': 62948721, 'average_label_per_text': 3.59, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}}}} |
MultiHateClassification |
['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'nld', 'pol', 'por', 'spa'] |
Classification |
s2s |
[Constructed, Written] |
None |
None |
MultiLongDocRetrieval (Jianlv Chen, 2024) |
['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'jpn', 'kor', 'por', 'rus', 'spa', 'tha'] |
Retrieval |
s2p |
[Encyclopaedic, Written, Web, Non-fiction, Fiction] |
None |
None |
MultilingualSentiment |
['cmn'] |
Classification |
s2s |
|
None |
None |
MultilingualSentimentClassification |
['ara', 'bam', 'bul', 'cmn', 'cym', 'deu', 'dza', 'ell', 'eng', 'eus', 'fas', 'fin', 'heb', 'hrv', 'ind', 'jpn', 'kor', 'mlt', 'nor', 'pol', 'rus', 'slk', 'spa', 'tha', 'tur', 'uig', 'urd', 'vie', 'zho'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
MyanmarNews (A. H. Khine, 2017) |
['mya'] |
Classification |
p2p |
[News, Written] |
None |
None |
NFCorpus (Boteva et al., 2016) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
NFCorpus-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
None |
NLPJournalAbsIntroRetrieval |
['jpn'] |
Retrieval |
s2s |
[Academic, Written] |
None |
None |
NLPJournalTitleAbsRetrieval |
['jpn'] |
Retrieval |
s2s |
[Academic, Written] |
None |
None |
NLPJournalTitleIntroRetrieval |
['jpn'] |
Retrieval |
s2s |
[Academic, Written] |
None |
None |
NQ (Tom Kwiatkowski, 2019) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
NQ-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
None |
NQ-PLHardNegatives (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
None |
NQHardNegatives (Tom Kwiatkowski, 2019) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
NTREXBitextMining |
['afr', 'amh', 'arb', 'aze', 'bak', 'bel', 'bem', 'ben', 'bod', 'bos', 'bul', 'cat', 'ces', 'ckb', 'cym', 'dan', 'deu', 'div', 'dzo', 'ell', 'eng', 'eus', 'ewe', 'fao', 'fas', 'fij', 'fil', 'fin', 'fra', 'fuc', 'gle', 'glg', 'guj', 'hau', 'heb', 'hin', 'hmn', 'hrv', 'hun', 'hye', 'ibo', 'ind', 'isl', 'ita', 'jpn', 'kan', 'kat', 'kaz', 'khm', 'kin', 'kir', 'kmr', 'kor', 'lao', 'lav', 'lit', 'ltz', 'mal', 'mar', 'mey', 'mkd', 'mlg', 'mlt', 'mon', 'mri', 'msa', 'mya', 'nde', 'nep', 'nld', 'nno', 'nob', 'nso', 'nya', 'orm', 'pan', 'pol', 'por', 'prs', 'pus', 'ron', 'rus', 'shi', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'spa', 'sqi', 'srp', 'ssw', 'swa', 'swe', 'tah', 'tam', 'tat', 'tel', 'tgk', 'tha', 'tir', 'ton', 'tsn', 'tuk', 'tur', 'uig', 'ukr', 'urd', 'uzb', 'ven', 'vie', 'wol', 'xho', 'yor', 'yue', 'zho', 'zul'] |
BitextMining |
s2s |
[News, Written] |
None |
None |
NYSJudicialEthicsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
NaijaSenti |
['hau', 'ibo', 'pcm', 'yor'] |
Classification |
s2s |
[Social, Written] |
None |
None |
NarrativeQARetrieval (Tomáš Kočiský, 2017) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
NepaliNewsClassification |
['nep'] |
Classification |
s2s |
[News, Written] |
None |
None |
NeuCLIR2022Retrieval (Lawrie et al., 2023) |
['fas', 'rus', 'zho'] |
Retrieval |
s2p |
[News, Written] |
None |
None |
NeuCLIR2022RetrievalHardNegatives (Lawrie et al., 2023) |
['fas', 'rus', 'zho'] |
Retrieval |
s2p |
[News, Written] |
None |
None |
NeuCLIR2023Retrieval (Dawn Lawrie, 2024) |
['fas', 'rus', 'zho'] |
Retrieval |
s2p |
[News, Written] |
None |
None |
NeuCLIR2023RetrievalHardNegatives (Dawn Lawrie, 2024) |
['fas', 'rus', 'zho'] |
Retrieval |
s2p |
[News, Written] |
None |
None |
News21InstructionRetrieval (Orion Weller, 2024) |
['eng'] |
InstructionRetrieval |
s2p |
[News, Written] |
None |
None |
NewsClassification (Zhang et al., 2015) |
['eng'] |
Classification |
s2s |
[News, Written] |
None |
None |
NoRecClassification |
['nob'] |
Classification |
s2s |
[Written, Reviews] |
None |
None |
NollySentiBitextMining (Shode et al., 2023) |
['eng', 'hau', 'ibo', 'pcm', 'yor'] |
BitextMining |
s2s |
[Social, Reviews, Written] |
None |
None |
NorQuadRetrieval |
['nob'] |
Retrieval |
p2p |
[Encyclopaedic, Non-fiction, Written] |
None |
None |
NordicLangClassification |
['dan', 'fao', 'isl', 'nno', 'nob', 'swe'] |
Classification |
s2s |
[Encyclopaedic] |
None |
None |
NorwegianCourtsBitextMining (Tiedemann et al., 2020) |
['nno', 'nob'] |
BitextMining |
s2s |
[Legal, Written] |
None |
None |
NorwegianParliamentClassification |
['nob'] |
Classification |
s2s |
[Government, Spoken] |
None |
None |
NusaParagraphEmotionClassification |
['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] |
Classification |
s2s |
[Non-fiction, Fiction, Written] |
None |
None |
NusaParagraphTopicClassification |
['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] |
Classification |
s2s |
[Non-fiction, Fiction, Written] |
None |
None |
NusaTranslationBitextMining (Cahyawijaya et al., 2023) |
['abs', 'bbc', 'bew', 'bhp', 'ind', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] |
BitextMining |
s2s |
[Social, Written] |
{'train': 50200} |
{'train': {'average_sentence1_length': 145.46, 'average_sentence2_length': 148.57, 'num_samples': 50200, 'number_of_characters': 14759870, 'hf_subset_descriptive_stats': {'ind-abs': {'average_sentence1_length': 148.37, 'average_sentence2_length': 147.31, 'num_samples': 1000, 'number_of_characters': 295680}, 'ind-btk': {'average_sentence1_length': 145.37, 'average_sentence2_length': 146.74, 'num_samples': 6600, 'number_of_characters': 1927907}, 'ind-bew': {'average_sentence1_length': 145.43, 'average_sentence2_length': 148.41, 'num_samples': 6600, 'number_of_characters': 1939300}, 'ind-bhp': {'average_sentence1_length': 133.53, 'average_sentence2_length': 128.14, 'num_samples': 1000, 'number_of_characters': 261666}, 'ind-jav': {'average_sentence1_length': 145.43, 'average_sentence2_length': 145.81, 'num_samples': 6600, 'number_of_characters': 1922162}, 'ind-mad': {'average_sentence1_length': 145.36, 'average_sentence2_length': 153.62, 'num_samples': 6600, 'number_of_characters': 1973257}, 'ind-mak': {'average_sentence1_length': 145.43, 'average_sentence2_length': 150.61, 'num_samples': 6600, 'number_of_characters': 1953868}, 'ind-min': {'average_sentence1_length': 145.43, 'average_sentence2_length': 148.06, 'num_samples': 6600, 'number_of_characters': 1937033}, 'ind-mui': {'average_sentence1_length': 150.45, 'average_sentence2_length': 150.99, 'num_samples': 1000, 'number_of_characters': 301448}, 'ind-rej': {'average_sentence1_length': 151.62, 'average_sentence2_length': 139.58, 'num_samples': 1000, 'number_of_characters': 291205}, 'ind-sun': {'average_sentence1_length': 145.43, 'average_sentence2_length': 150.99, 'num_samples': 6600, 'number_of_characters': 1956344}}}} |
NusaX-senti (Winata et al., 2022) |
['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] |
Classification |
s2s |
[Reviews, Web, Social, Constructed, Written] |
None |
None |
NusaXBitextMining (Winata et al., 2023) |
['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] |
BitextMining |
s2s |
[Reviews, Written] |
None |
None |
OPP115DataRetentionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
OPP115DataSecurityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
OPP115DoNotTrackLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
OPP115FirstPartyCollectionUseLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
OPP115InternationalAndSpecificAudiencesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
OPP115PolicyChangeLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
OPP115ThirdPartySharingCollectionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
OPP115UserAccessEditAndDeletionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
OPP115UserChoiceControlLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
Ocnli (Hai Hu, 2020) |
['cmn'] |
PairClassification |
s2s |
|
None |
None |
OdiaNewsClassification (Anoop Kunchukuttan, 2020) |
['ory'] |
Classification |
s2s |
[News, Written] |
None |
None |
OnlineShopping (Xiao et al., 2023) |
['cmn'] |
Classification |
s2s |
|
None |
None |
OnlineStoreReviewSentimentClassification |
['ara'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
OpusparcusPC (Mathias Creutz, 2018) |
['deu', 'eng', 'fin', 'fra', 'rus', 'swe'] |
PairClassification |
s2s |
[Spoken, Spoken] |
None |
None |
OralArgumentQuestionPurposeLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
OverrulingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
PAC (Łukasz Augustyniak, 2022) |
['pol'] |
Classification |
p2p |
[Legal, Written] |
None |
None |
PAWSX (Shitao Xiao, 2024) |
['cmn'] |
STS |
s2s |
|
None |
None |
PIQA (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
PROALegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
PSC |
['pol'] |
PairClassification |
s2s |
[News, Written] |
None |
None |
PatentClassification |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
PawsXPairClassification (Yinfei Yang, 2019) |
['cmn', 'deu', 'eng', 'fra', 'jpn', 'kor', 'spa'] |
PairClassification |
s2s |
[Web, Encyclopaedic, Written] |
{'test': 14000, 'validation': 14000} |
{'test': {'num_samples': 14000, 'number_of_characters': 2551922, 'avg_sentence1_len': 91.18, 'avg_sentence2_len': 91.1, 'unique_labels': 2, 'labels': {'1': {'count': 6285}, '0': {'count': 7715}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 478034, 'avg_sentence1_len': 119.78, 'avg_sentence2_len': 119.24, 'unique_labels': 2, 'labels': {'1': {'count': 895}, '0': {'count': 1105}}}, 'en': {'num_samples': 2000, 'number_of_characters': 454362, 'avg_sentence1_len': 113.76, 'avg_sentence2_len': 113.42, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'es': {'num_samples': 2000, 'number_of_characters': 471226, 'avg_sentence1_len': 117.81, 'avg_sentence2_len': 117.8, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 480033, 'avg_sentence1_len': 120.03, 'avg_sentence2_len': 119.99, 'unique_labels': 2, 'labels': {'1': {'count': 903}, '0': {'count': 1097}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 235106, 'avg_sentence1_len': 58.68, 'avg_sentence2_len': 58.88, 'unique_labels': 2, 'labels': {'1': {'count': 883}, '0': {'count': 1117}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 260149, 'avg_sentence1_len': 64.96, 'avg_sentence2_len': 65.11, 'unique_labels': 2, 'labels': {'1': {'count': 896}, '0': {'count': 1104}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 173012, 'avg_sentence1_len': 43.23, 'avg_sentence2_len': 43.27, 'unique_labels': 2, 'labels': {'1': {'count': 894}, '0': {'count': 1106}}}}}, 'validation': {'num_samples': 14000, 'number_of_characters': 2524625, 'avg_sentence1_len': 90.13, 'avg_sentence2_len': 90.2, 'unique_labels': 2, 'labels': {'1': {'count': 5948}, '0': {'count': 8052}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 467643, 'avg_sentence1_len': 116.82, 'avg_sentence2_len': 117.0, 'unique_labels': 2, 'labels': {'1': {'count': 831}, '0': {'count': 1169}}}, 'en': {'num_samples': 2000, 'number_of_characters': 451931, 'avg_sentence1_len': 113.11, 'avg_sentence2_len': 112.86, 'unique_labels': 2, 'labels': {'1': {'count': 863}, '0': {'count': 1137}}}, 'es': {'num_samples': 2000, 'number_of_characters': 466112, 'avg_sentence1_len': 116.33, 'avg_sentence2_len': 116.73, 'unique_labels': 2, 'labels': {'1': {'count': 847}, '0': {'count': 1153}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 478510, 'avg_sentence1_len': 119.5, 'avg_sentence2_len': 119.75, 'unique_labels': 2, 'labels': {'1': {'count': 860}, '0': {'count': 1140}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 229655, 'avg_sentence1_len': 57.51, 'avg_sentence2_len': 57.32, 'unique_labels': 2, 'labels': {'1': {'count': 854}, '0': {'count': 1146}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 261355, 'avg_sentence1_len': 65.16, 'avg_sentence2_len': 65.52, 'unique_labels': 2, 'labels': {'1': {'count': 840}, '0': {'count': 1160}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 169419, 'avg_sentence1_len': 42.45, 'avg_sentence2_len': 42.26, 'unique_labels': 2, 'labels': {'1': {'count': 853}, '0': {'count': 1147}}}}}} |
PersianFoodSentimentClassification (Mehrdad Farahani et al., 2020) |
['fas'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
PersonalJurisdictionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
PhincBitextMining (Srivastava et al., 2020) |
['eng', 'hin'] |
BitextMining |
s2s |
[Social, Written] |
None |
None |
PlscClusteringP2P.v2 |
['pol'] |
Clustering |
s2s |
[Academic, Written] |
None |
None |
PlscClusteringS2S.v2 |
['pol'] |
Clustering |
s2s |
[Academic, Written] |
None |
None |
PoemSentimentClassification (Emily Sheng, 2020) |
['eng'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
PolEmo2.0-IN |
['pol'] |
Classification |
s2s |
[Written, Social] |
None |
None |
PolEmo2.0-OUT |
['pol'] |
Classification |
s2s |
[Written, Social] |
None |
None |
PpcPC (Sławomir Dadas, 2022) |
['pol'] |
PairClassification |
s2s |
[Fiction, Non-fiction, Web, Written, Spoken, Social, News] |
None |
None |
PublicHealthQA |
['ara', 'eng', 'fra', 'kor', 'rus', 'spa', 'vie', 'zho'] |
Retrieval |
s2p |
[Medical, Government, Web, Written] |
None |
None |
PunjabiNewsClassification (Anoop Kunchukuttan, 2020) |
['pan'] |
Classification |
s2s |
[News, Written] |
None |
None |
QBQTC |
['cmn'] |
STS |
s2s |
|
None |
None |
Quail (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
Quora-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2s |
|
None |
None |
Quora-PLHardNegatives (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2s |
|
None |
None |
QuoraRetrieval (DataCanary et al., 2017) |
['eng'] |
Retrieval |
s2s |
|
None |
None |
QuoraRetrievalHardNegatives (DataCanary et al., 2017) |
['eng'] |
Retrieval |
s2s |
|
None |
None |
RARbCode (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Programming, Written] |
None |
None |
RARbMath (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
RTE3 |
['deu', 'eng', 'fra', 'ita'] |
PairClassification |
s2s |
[News, Web, Encyclopaedic, Written] |
None |
None |
RUParaPhraserSTS (Pivovarova et al., 2017) |
['rus'] |
STS |
s2s |
[News, Written] |
None |
None |
RedditClustering.v2 (Gregor Geigle, 2021) |
['eng'] |
Clustering |
s2s |
[Web, Social, Written] |
None |
None |
RedditClusteringP2P.v2 (Gregor Geigle, 2021) |
['eng'] |
Clustering |
p2p |
[Web, Social, Written] |
None |
None |
RestaurantReviewSentimentClassification (ElSahar et al., 2015) |
['ara'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
RiaNewsRetrieval (Gavrilov et al., 2019) |
['rus'] |
Retrieval |
s2p |
[News, Written] |
None |
None |
RiaNewsRetrievalHardNegatives (Gavrilov et al., 2019) |
['rus'] |
Retrieval |
s2p |
[News, Written] |
None |
None |
Robust04InstructionRetrieval (Orion Weller, 2024) |
['eng'] |
InstructionRetrieval |
s2p |
[News, Written] |
None |
None |
RomaTalesBitextMining |
['hun', 'rom'] |
BitextMining |
s2s |
[Fiction, Written] |
None |
None |
RomaniBibleClustering |
['rom'] |
Clustering |
p2p |
[Religious, Written] |
None |
None |
RomanianReviewsSentiment (Anca Maria Tache, 2021) |
['ron'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
RomanianSentimentClassification (Dumitrescu et al., 2020) |
['ron'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
RonSTS (Dumitrescu et al., 2021) |
['ron'] |
STS |
s2s |
[News, Social, Web, Written] |
None |
None |
RuBQReranking (Ivan Rybin, 2021) |
['rus'] |
Reranking |
s2p |
[Encyclopaedic, Written] |
None |
None |
RuBQRetrieval (Ivan Rybin, 2021) |
['rus'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
RuReviewsClassification (Sergey Smetanin, 2019) |
['rus'] |
Classification |
p2p |
[Reviews, Written] |
None |
None |
RuSTSBenchmarkSTS (Philip May, 2021) |
['rus'] |
STS |
s2s |
[News, Social, Web, Written] |
None |
None |
RuSciBenchGRNTIClassification |
['rus'] |
Classification |
p2p |
[Academic, Written] |
None |
None |
RuSciBenchGRNTIClusteringP2P |
['rus'] |
Clustering |
p2p |
[Academic, Written] |
{'test': 2048} |
{'test': {'num_samples': 2048, 'number_of_characters': 1822339, 'average_text_length': 889.81, 'average_labels_per_text': 1.0, 'unique_labels': 28, 'labels': {'3': {'count': 73}, '4': {'count': 73}, '20': {'count': 73}, '9': {'count': 73}, '21': {'count': 73}, '15': {'count': 73}, '16': {'count': 74}, '2': {'count': 73}, '8': {'count': 73}, '23': {'count': 73}, '6': {'count': 73}, '24': {'count': 73}, '10': {'count': 73}, '1': {'count': 73}, '17': {'count': 74}, '14': {'count': 74}, '18': {'count': 73}, '27': {'count': 73}, '19': {'count': 73}, '22': {'count': 73}, '12': {'count': 73}, '25': {'count': 73}, '5': {'count': 74}, '0': {'count': 73}, '26': {'count': 73}, '11': {'count': 73}, '13': {'count': 73}, '7': {'count': 73}}}} |
RuSciBenchOECDClassification |
['rus'] |
Classification |
p2p |
[Academic, Written] |
None |
None |
RuSciBenchOECDClusteringP2P |
['rus'] |
Clustering |
p2p |
[Academic, Written] |
None |
None |
SCDBPAccountabilityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SCDBPAuditsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SCDBPCertificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SCDBPTrainingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SCDBPVerificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SCDDAccountabilityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SCDDAuditsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SCDDCertificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SCDDTrainingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SCDDVerificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SCIDOCS (Arman Cohan, 2020) |
['eng'] |
Retrieval |
s2p |
[Academic, Written, Non-fiction] |
None |
None |
SCIDOCS-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
None |
SIB200Classification (Adelani et al., 2023) |
['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nqo', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] |
Classification |
s2s |
[News, Written] |
None |
None |
SIB200ClusteringS2S (Adelani et al., 2023) |
['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nqo', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] |
Clustering |
s2s |
[News, Written] |
None |
None |
SICK-BR-PC |
['por'] |
PairClassification |
s2s |
[Web, Written] |
None |
None |
SICK-BR-STS |
['por'] |
STS |
s2s |
[Web, Written] |
None |
None |
SICK-E-PL |
['pol'] |
PairClassification |
s2s |
|
None |
None |
SICK-R |
['eng'] |
STS |
s2s |
|
None |
None |
SICK-R-PL |
['pol'] |
STS |
s2s |
[Web, Written] |
None |
None |
SICKFr |
['fra'] |
STS |
s2s |
|
None |
None |
SIQA (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
SKQuadRetrieval |
['slk'] |
Retrieval |
s2s |
[Encyclopaedic] |
None |
None |
SNLHierarchicalClusteringP2P (Navjord et al., 2023) |
['nob'] |
Clustering |
p2p |
[Encyclopaedic, Non-fiction, Written] |
None |
None |
SNLHierarchicalClusteringS2S (Navjord et al., 2023) |
['nob'] |
Clustering |
s2s |
[Encyclopaedic, Non-fiction, Written] |
None |
None |
SNLRetrieval (Navjord et al., 2023) |
['nob'] |
Retrieval |
p2p |
[Encyclopaedic, Non-fiction, Written] |
None |
None |
SRNCorpusBitextMining (Zwennicker et al., 2022) |
['nld', 'srn'] |
BitextMining |
s2s |
[Social, Web, Written] |
None |
None |
STS12 (Agirre et al., 2012) |
['eng'] |
STS |
s2s |
[Encyclopaedic, News, Written] |
{'test': 3108} |
{'test': {'num_samples': 3108, 'number_of_characters': 402118, 'average_sentence1_len': 63.79, 'average_sentence2_len': 65.59, 'avg_score': 3.51}} |
STS13 (Eneko Agirre, 2013) |
['eng'] |
STS |
s2s |
[Web, News, Non-fiction, Written] |
None |
None |
STS14 |
['eng'] |
STS |
s2s |
[Blog, Web, Spoken] |
None |
None |
STS15 |
['eng'] |
STS |
s2s |
[Blog, News, Web, Written, Spoken] |
None |
None |
STS16 |
['eng'] |
STS |
s2s |
[Blog, Web, Spoken] |
None |
None |
STS17 |
['ara', 'deu', 'eng', 'fra', 'ita', 'kor', 'nld', 'spa', 'tur'] |
STS |
s2s |
[News, Web, Written] |
{'test': 5346} |
{'test': {'num_samples': 5346, 'number_of_characters': 400264, 'average_sentence1_len': 38.15, 'average_sentence2_len': 36.73, 'avg_score': 2.36, 'hf_subset_descriptive_stats': {'ko-ko': {'num_samples': 2846, 'number_of_characters': 183387, 'average_sentence1_len': 31.99, 'average_sentence2_len': 32.44, 'avg_score': 2.47}, 'ar-ar': {'num_samples': 250, 'number_of_characters': 16247, 'average_sentence1_len': 32.21, 'average_sentence2_len': 32.78, 'avg_score': 2.22}, 'en-ar': {'num_samples': 250, 'number_of_characters': 18764, 'average_sentence1_len': 42.36, 'average_sentence2_len': 32.7, 'avg_score': 2.14}, 'en-de': {'num_samples': 250, 'number_of_characters': 22177, 'average_sentence1_len': 43.95, 'average_sentence2_len': 44.76, 'avg_score': 2.28}, 'en-en': {'num_samples': 250, 'number_of_characters': 21669, 'average_sentence1_len': 43.95, 'average_sentence2_len': 42.72, 'avg_score': 2.28}, 'en-tr': {'num_samples': 250, 'number_of_characters': 20879, 'average_sentence1_len': 41.92, 'average_sentence2_len': 41.6, 'avg_score': 2.13}, 'es-en': {'num_samples': 250, 'number_of_characters': 23216, 'average_sentence1_len': 50.84, 'average_sentence2_len': 42.02, 'avg_score': 2.15}, 'es-es': {'num_samples': 250, 'number_of_characters': 25265, 'average_sentence1_len': 49.84, 'average_sentence2_len': 51.22, 'avg_score': 2.23}, 'fr-en': {'num_samples': 250, 'number_of_characters': 23087, 'average_sentence1_len': 49.62, 'average_sentence2_len': 42.72, 'avg_score': 2.28}, 'it-en': {'num_samples': 250, 'number_of_characters': 23188, 'average_sentence1_len': 50.03, 'average_sentence2_len': 42.72, 'avg_score': 2.28}, 'nl-en': {'num_samples': 250, 'number_of_characters': 22385, 'average_sentence1_len': 46.82, 'average_sentence2_len': 42.72, 'avg_score': 2.28}}}} |
STS22.v2 |
['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'pol', 'rus', 'spa', 'tur'] |
STS |
p2p |
[News, Written] |
None |
None |
STSB (Shitao Xiao, 2024) |
['cmn'] |
STS |
s2s |
|
None |
None |
STSBenchmark (Philip May, 2021) |
['eng'] |
STS |
s2s |
|
None |
None |
STSBenchmarkMultilingualSTS (Philip May, 2021) |
['cmn', 'deu', 'eng', 'fra', 'ita', 'nld', 'pol', 'por', 'rus', 'spa'] |
STS |
s2s |
[News, Social, Web, Spoken, Written] |
None |
None |
STSES (Agirre et al., 2015) |
['spa'] |
STS |
s2s |
[Written] |
None |
None |
SadeemQuestionRetrieval |
['ara'] |
Retrieval |
s2p |
[Written, Written] |
None |
None |
SanskritShlokasClassification |
['san'] |
Classification |
s2s |
[Religious, Written] |
None |
None |
ScalaClassification |
['dan', 'nno', 'nob', 'swe'] |
Classification |
s2s |
[Fiction, News, Non-fiction, Blog, Spoken, Web, Written] |
None |
None |
SciDocsRR |
['eng'] |
Reranking |
s2s |
[Academic, Non-fiction, Written] |
None |
None |
SciFact (Arman Cohan, 2020) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
SciFact-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
None |
SemRel24STS (Nedjma Ousidhoum, 2024) |
['afr', 'amh', 'arb', 'arq', 'ary', 'eng', 'hau', 'hin', 'ind', 'kin', 'mar', 'tel'] |
STS |
s2s |
[Spoken, Written] |
None |
None |
SensitiveTopicsClassification |
['rus'] |
MultilabelClassification |
s2s |
[Web, Social, Written] |
None |
None |
SentimentAnalysisHindi (Shantipriya Parida, 2023) |
['hin'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
SinhalaNewsClassification (Nisansa de Silva, 2015) |
['sin'] |
Classification |
s2s |
[News, Written] |
None |
None |
SinhalaNewsSourceClassification (Dhananjaya et al., 2022) |
['sin'] |
Classification |
s2s |
[News, Written] |
None |
None |
SiswatiNewsClassification (Madodonga et al., 2023) |
['ssw'] |
Classification |
s2s |
[News, Written] |
None |
None |
SlovakHateSpeechClassification |
['slk'] |
Classification |
s2s |
[Social, Written] |
{'test': 1319} |
{'test': {'num_samples': 1319, 'number_of_characters': 122279, 'average_text_length': 92.71, 'unique_labels': 2, 'labels': {'1': {'count': 360}, '0': {'count': 959}}}} |
SlovakMovieReviewSentimentClassification ({�{S, 2023) |
['svk'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
SlovakSumRetrieval |
['slk'] |
Retrieval |
s2s |
[News, Social, Web, Written] |
None |
None |
SouthAfricanLangClassification (ExploreAI Academy et al., 2022) |
['afr', 'eng', 'nbl', 'nso', 'sot', 'ssw', 'tsn', 'tso', 'ven', 'xho', 'zul'] |
Classification |
s2s |
[Web, Non-fiction, Written] |
None |
None |
SpanishNewsClassification |
['spa'] |
Classification |
s2s |
[News, Written] |
None |
None |
SpanishNewsClusteringP2P |
['spa'] |
Clustering |
p2p |
|
None |
None |
SpanishPassageRetrievalS2P |
['spa'] |
Retrieval |
s2p |
|
None |
None |
SpanishPassageRetrievalS2S |
['spa'] |
Retrieval |
s2s |
|
None |
None |
SpanishSentimentClassification |
['spa'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
SpartQA (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
SprintDuplicateQuestions |
['eng'] |
PairClassification |
s2s |
[Programming, Written] |
None |
None |
StackExchangeClustering.v2 (Gregor Geigle, 2021) |
['eng'] |
Clustering |
s2s |
[Web, Written] |
None |
None |
StackExchangeClusteringP2P.v2 (Gregor Geigle, 2021) |
['eng'] |
Clustering |
p2p |
[Web, Written] |
None |
None |
StackOverflowDupQuestions (Xueqing Liu, 2018) |
['eng'] |
Reranking |
s2s |
|
None |
None |
StackOverflowQA (Xiangyang Li, 2024) |
['eng'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 21925} |
{'test': {'number_of_characters': 2506.11, 'num_samples': 21925, 'num_queries': 1994, 'num_documents': 19931, 'average_document_length': 0.06, 'average_query_length': 0.65, 'average_relevant_docs_per_query': 1.0}} |
StatcanDialogueDatasetRetrieval |
['eng', 'fra'] |
Retrieval |
s2p |
[Government, Web, Written] |
None |
None |
SummEvalFrSummarization.v2 (Fabbri et al., 2020) |
['fra'] |
Summarization |
p2p |
[News, Written] |
None |
None |
SummEvalSummarization.v2 (Fabbri et al., 2020) |
['eng'] |
Summarization |
p2p |
[News, Written] |
None |
None |
SwahiliNewsClassification |
['swa'] |
Classification |
s2s |
[News, Written] |
None |
None |
SweFaqRetrieval (Berdi{�{c, 2023) |
['swe'] |
Retrieval |
s2s |
[Government, Non-fiction, Written] |
None |
None |
SweRecClassification |
['swe'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
SwedishSentimentClassification |
['swe'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
SwednClusteringP2P (Monsen et al., 2021) |
['swe'] |
Clustering |
p2p |
[News, Non-fiction, Written] |
None |
None |
SwednClusteringS2S (Monsen et al., 2021) |
['swe'] |
Clustering |
s2s |
[News, Non-fiction, Written] |
None |
None |
SwednRetrieval (Monsen et al., 2021) |
['swe'] |
Retrieval |
p2p |
[News, Non-fiction, Written] |
None |
None |
SwissJudgementClassification (Joel Niklaus, 2022) |
['deu', 'fra', 'ita'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
SyntecReranking (Mathieu Ciancone, 2024) |
['fra'] |
Reranking |
s2p |
[Legal, Written] |
None |
None |
SyntecRetrieval (Mathieu Ciancone, 2024) |
['fra'] |
Retrieval |
s2p |
[Legal, Written] |
None |
None |
SyntheticText2SQL (Meyer et al., 2024) |
['eng', 'sql'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 111702} |
{'test': {'number_of_characters': 210.98, 'num_samples': 111702, 'num_queries': 5851, 'num_documents': 105851, 'average_document_length': 0.0, 'average_query_length': 0.01, 'average_relevant_docs_per_query': 1.0}} |
T2Reranking (Xiaohui Xie, 2023) |
['cmn'] |
Reranking |
s2s |
|
None |
None |
T2Retrieval (Xiaohui Xie, 2023) |
['cmn'] |
Retrieval |
s2p |
|
None |
None |
TERRa (Shavrina et al., 2020) |
['rus'] |
PairClassification |
s2s |
[News, Web, Written] |
None |
None |
TNews |
['cmn'] |
Classification |
s2s |
|
None |
None |
TRECCOVID (Kirk Roberts, 2021) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
TRECCOVID-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
[Academic, Non-fiction, Written] |
None |
None |
TV2Nordretrieval |
['dan'] |
Retrieval |
p2p |
[News, Non-fiction, Written] |
None |
None |
TamilNewsClassification (Anoop Kunchukuttan, 2020) |
['tam'] |
Classification |
s2s |
[News, Written] |
None |
None |
Tatoeba (Tatoeba community, 2021) |
['afr', 'amh', 'ang', 'ara', 'arq', 'arz', 'ast', 'awa', 'aze', 'bel', 'ben', 'ber', 'bos', 'bre', 'bul', 'cat', 'cbk', 'ceb', 'ces', 'cha', 'cmn', 'cor', 'csb', 'cym', 'dan', 'deu', 'dsb', 'dtp', 'ell', 'eng', 'epo', 'est', 'eus', 'fao', 'fin', 'fra', 'fry', 'gla', 'gle', 'glg', 'gsw', 'heb', 'hin', 'hrv', 'hsb', 'hun', 'hye', 'ido', 'ile', 'ina', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kat', 'kaz', 'khm', 'kor', 'kur', 'kzj', 'lat', 'lfn', 'lit', 'lvs', 'mal', 'mar', 'max', 'mhr', 'mkd', 'mon', 'nds', 'nld', 'nno', 'nob', 'nov', 'oci', 'orv', 'pam', 'pes', 'pms', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'swg', 'swh', 'tam', 'tat', 'tel', 'tgl', 'tha', 'tuk', 'tur', 'tzl', 'uig', 'ukr', 'urd', 'uzb', 'vie', 'war', 'wuu', 'xho', 'yid', 'yue', 'zsm'] |
BitextMining |
s2s |
[Written] |
None |
None |
TbilisiCityHallBitextMining |
['eng', 'kat'] |
BitextMining |
s2s |
[News, Written] |
None |
None |
TelemarketingSalesRuleLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
TeluguAndhraJyotiNewsClassification |
['tel'] |
Classification |
s2s |
[News, Written] |
None |
None |
TempReasonL1 (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
TempReasonL2Context (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
TempReasonL2Fact (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
TempReasonL2Pure (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
TempReasonL3Context (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
TempReasonL3Fact (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
TempReasonL3Pure (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
TenKGnadClassification |
['deu'] |
Classification |
p2p |
[News, Written] |
None |
None |
TenKGnadClusteringP2P.v2 |
['deu'] |
Clustering |
p2p |
[News, Non-fiction, Written] |
None |
None |
TenKGnadClusteringS2S.v2 |
['deu'] |
Clustering |
s2s |
[News, Non-fiction, Written] |
None |
None |
TextualismToolDictionariesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
TextualismToolPlainLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
ThuNewsClusteringP2P.v2 (Sun et al., 2016) |
['cmn'] |
Clustering |
p2p |
[News, Written] |
None |
None |
ThuNewsClusteringS2S.v2 (Sun et al., 2016) |
['cmn'] |
Clustering |
s2s |
[News, Written] |
None |
None |
TopiOCQA (Vaibhav Adlakha, 2022) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
TopiOCQAHardNegatives (Vaibhav Adlakha, 2022) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
Touche2020Retrieval.v3 |
['eng'] |
Retrieval |
s2p |
[Academic] |
{'test': 303781} |
{'test': {'number_of_characters': 2140.82, 'num_samples': 303781, 'num_queries': 49, 'num_documents': 303732, 'average_document_length': 0.01, 'average_query_length': 0.89, 'average_relevant_docs_per_query': 34.94}} |
ToxicChatClassification (Zi Lin, 2023) |
['eng'] |
Classification |
s2s |
[Constructed, Written] |
None |
None |
ToxicConversationsClassification (cjadams, 2019) |
['eng'] |
Classification |
s2s |
[Social, Written] |
None |
None |
TswanaNewsClassification (Vukosi Marivate, 2023) |
['tsn'] |
Classification |
s2s |
[News, Written] |
None |
None |
TurHistQuadRetrieval (Soygazi et al., 2021) |
['tur'] |
Retrieval |
p2p |
[Encyclopaedic, Non-fiction, Academic, Written] |
None |
None |
TurkicClassification |
['bak', 'kaz', 'kir'] |
Classification |
s2s |
[News, Written] |
None |
None |
TurkishMovieSentimentClassification (Erkin Demirtas, 2013) |
['tur'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
TurkishProductSentimentClassification (Erkin Demirtas, 2013) |
['tur'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
TweetEmotionClassification (Al-Khatib et al., 2018) |
['ara'] |
Classification |
s2s |
[Social, Written] |
None |
None |
TweetSarcasmClassification |
['ara'] |
Classification |
s2s |
[Social, Written] |
None |
None |
TweetSentimentClassification |
['ara', 'deu', 'eng', 'fra', 'hin', 'ita', 'por', 'spa'] |
Classification |
s2s |
[Social, Written] |
None |
None |
TweetSentimentExtractionClassification (Maggie et al., 2020) |
['eng'] |
Classification |
s2s |
[Social, Written] |
None |
None |
TweetTopicSingleClassification |
['eng'] |
Classification |
s2s |
[Social, News, Written] |
None |
None |
TwentyNewsgroupsClustering.v2 (Ken Lang, 1995) |
['eng'] |
Clustering |
s2s |
[News, Written] |
None |
None |
TwitterHjerneRetrieval (Holm et al., 2024) |
['dan'] |
Retrieval |
p2p |
[Social, Written] |
None |
None |
TwitterSemEval2015 |
['eng'] |
PairClassification |
s2s |
|
None |
None |
TwitterURLCorpus |
['eng'] |
PairClassification |
s2s |
|
{'test': 51534} |
{'test': {'num_samples': 51534, 'number_of_characters': 8659940, 'avg_sentence1_len': 79.49, 'avg_sentence2_len': 88.55, 'unique_labels': 2, 'labels': {'0': {'count': 38546}, '1': {'count': 12988}}}} |
UCCVCommonLawLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
UkrFormalityClassification |
['ukr'] |
Classification |
s2s |
[News, Written] |
None |
None |
UnfairTOSLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
None |
None |
UrduRomanSentimentClassification (Sharf,Zareen, 2018) |
['urd'] |
Classification |
s2s |
[Social, Written] |
None |
None |
VGHierarchicalClusteringP2P (Navjord et al., 2023) |
['nob'] |
Clustering |
p2p |
[News, Non-fiction, Written] |
None |
None |
VGHierarchicalClusteringS2S (Navjord et al., 2023) |
['nob'] |
Clustering |
p2p |
[News, Non-fiction, Written] |
None |
None |
VideoRetrieval |
['cmn'] |
Retrieval |
s2p |
|
None |
None |
VieMedEVBitextMining (Nhu Vo, 2024) |
['eng', 'vie'] |
BitextMining |
s2s |
[Medical, Written] |
None |
None |
VieQuADRetrieval |
['vie'] |
Retrieval |
s2p |
[Encyclopaedic, Non-fiction, Written] |
None |
None |
VieStudentFeedbackClassification (Nguyen et al., 2018) |
['vie'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
VoyageMMarcoReranking (Benjamin Clavié, 2023) |
['jpn'] |
Reranking |
s2s |
[Academic, Non-fiction, Written] |
None |
None |
WRIMEClassification |
['jpn'] |
Classification |
s2s |
[Social, Written] |
None |
None |
Waimai (Xiao et al., 2023) |
['cmn'] |
Classification |
s2s |
|
None |
None |
WebLINXCandidatesReranking (Xing Han Lù, 2024) |
['eng'] |
Reranking |
p2p |
[Academic, Web, Written] |
None |
None |
WikiCitiesClustering |
['eng'] |
Clustering |
p2p |
[Encyclopaedic, Written] |
None |
None |
WikiClusteringP2P.v2 |
['bos', 'cat', 'ces', 'dan', 'eus', 'glv', 'ilo', 'kur', 'lav', 'min', 'mlt', 'sco', 'sqi', 'wln'] |
Clustering |
p2p |
[Encyclopaedic, Written] |
None |
None |
WikipediaRerankingMultilingual |
['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] |
Reranking |
s2p |
[Encyclopaedic, Written] |
{'test': 24000} |
{'test': {'num_samples': 24000, 'number_of_characters': 83866932, 'num_positive': 24000, 'num_negative': 192000, 'avg_query_len': 59.09, 'avg_positive_len': 385.45, 'avg_negative_len': 381.24, 'hf_subset_descriptive_stats': {'bg': {'num_samples': 1500, 'number_of_characters': 5145316, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 60.83, 'avg_positive_len': 375.89, 'avg_negative_len': 374.19}, 'bn': {'num_samples': 1500, 'number_of_characters': 5390581, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 47.27, 'avg_positive_len': 394.59, 'avg_negative_len': 393.98}, 'cs': {'num_samples': 1500, 'number_of_characters': 5079180, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 56.27, 'avg_positive_len': 383.84, 'avg_negative_len': 368.25}, 'da': {'num_samples': 1500, 'number_of_characters': 4746132, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 56.75, 'avg_positive_len': 351.68, 'avg_negative_len': 344.46}, 'de': {'num_samples': 1500, 'number_of_characters': 5483592, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 70.0, 'avg_positive_len': 391.54, 'avg_negative_len': 399.27}, 'en': {'num_samples': 1500, 'number_of_characters': 6217884, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 68.37, 'avg_positive_len': 451.73, 'avg_negative_len': 453.14}, 'fa': {'num_samples': 1500, 'number_of_characters': 4732619, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 48.67, 'avg_positive_len': 347.7, 'avg_negative_len': 344.84}, 'fi': {'num_samples': 1500, 'number_of_characters': 5209132, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 55.34, 'avg_positive_len': 394.71, 'avg_negative_len': 377.84}, 'hi': {'num_samples': 1500, 'number_of_characters': 5620959, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 50.78, 'avg_positive_len': 420.38, 'avg_negative_len': 409.52}, 'it': {'num_samples': 1500, 'number_of_characters': 5420496, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 70.05, 'avg_positive_len': 396.97, 'avg_negative_len': 393.33}, 'nl': {'num_samples': 1500, 'number_of_characters': 5169556, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 65.34, 'avg_positive_len': 380.79, 'avg_negative_len': 375.03}, 'pt': {'num_samples': 1500, 'number_of_characters': 5474356, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 65.12, 'avg_positive_len': 404.02, 'avg_negative_len': 397.55}, 'ro': {'num_samples': 1500, 'number_of_characters': 4796113, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 61.97, 'avg_positive_len': 346.71, 'avg_negative_len': 348.59}, 'sr': {'num_samples': 1500, 'number_of_characters': 5271732, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 55.67, 'avg_positive_len': 386.35, 'avg_negative_len': 384.06}, 'no': {'num_samples': 1500, 'number_of_characters': 5036586, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 55.29, 'avg_positive_len': 367.72, 'avg_negative_len': 366.84}, 'sv': {'num_samples': 1500, 'number_of_characters': 5072698, 'num_positive': 1500, 'num_negative': 12000, 'avg_query_len': 57.73, 'avg_positive_len': 372.59, 'avg_negative_len': 368.94}}}} |
WikipediaRetrievalMultilingual |
['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
None |
WinoGrande (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
None |
None |
WisesightSentimentClassification |
['tha'] |
Classification |
s2s |
[Social, News, Written] |
None |
None |
XMarket (Bonab et al., 2021) |
['deu', 'eng', 'spa'] |
Retrieval |
s2p |
|
None |
None |
XNLI (Conneau et al., 2018) |
['ara', 'bul', 'deu', 'ell', 'eng', 'fra', 'hin', 'rus', 'spa', 'swa', 'tha', 'tur', 'vie', 'zho'] |
PairClassification |
s2s |
[Non-fiction, Fiction, Government, Written] |
{'test': 19110, 'validation': 19110} |
{'test': {'num_samples': 19110, 'number_of_characters': 2907145, 'avg_sentence1_len': 103.24, 'avg_sentence2_len': 48.89, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 179591, 'avg_sentence1_len': 89.57, 'avg_sentence2_len': 41.99, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 220646, 'avg_sentence1_len': 110.02, 'avg_sentence2_len': 51.63, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241224, 'avg_sentence1_len': 119.93, 'avg_sentence2_len': 56.79, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 240222, 'avg_sentence1_len': 119.05, 'avg_sentence2_len': 56.93, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212223, 'avg_sentence1_len': 105.67, 'avg_sentence2_len': 49.8, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232207, 'avg_sentence1_len': 115.43, 'avg_sentence2_len': 54.68, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 245259, 'avg_sentence1_len': 121.1, 'avg_sentence2_len': 58.58, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 211312, 'avg_sentence1_len': 104.63, 'avg_sentence2_len': 50.17, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 222797, 'avg_sentence1_len': 110.77, 'avg_sentence2_len': 52.45, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210103, 'avg_sentence1_len': 104.44, 'avg_sentence2_len': 49.48, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192788, 'avg_sentence1_len': 96.69, 'avg_sentence2_len': 44.54, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208658, 'avg_sentence1_len': 103.68, 'avg_sentence2_len': 49.19, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 223549, 'avg_sentence1_len': 111.31, 'avg_sentence2_len': 52.46, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 66566, 'avg_sentence1_len': 33.04, 'avg_sentence2_len': 15.73, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}, 'validation': {'num_samples': 19110, 'number_of_characters': 2909058, 'avg_sentence1_len': 103.21, 'avg_sentence2_len': 49.02, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 177355, 'avg_sentence1_len': 88.32, 'avg_sentence2_len': 41.61, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 219988, 'avg_sentence1_len': 109.2, 'avg_sentence2_len': 51.97, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241852, 'avg_sentence1_len': 119.81, 'avg_sentence2_len': 57.37, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 241275, 'avg_sentence1_len': 119.88, 'avg_sentence2_len': 56.88, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212384, 'avg_sentence1_len': 105.72, 'avg_sentence2_len': 49.88, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232451, 'avg_sentence1_len': 115.17, 'avg_sentence2_len': 55.12, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 246857, 'avg_sentence1_len': 121.76, 'avg_sentence2_len': 59.09, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 212269, 'avg_sentence1_len': 105.06, 'avg_sentence2_len': 50.44, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 221152, 'avg_sentence1_len': 109.75, 'avg_sentence2_len': 52.27, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210482, 'avg_sentence1_len': 104.32, 'avg_sentence2_len': 49.88, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192640, 'avg_sentence1_len': 97.28, 'avg_sentence2_len': 43.84, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208305, 'avg_sentence1_len': 102.97, 'avg_sentence2_len': 49.64, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 224811, 'avg_sentence1_len': 112.26, 'avg_sentence2_len': 52.43, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 67237, 'avg_sentence1_len': 33.41, 'avg_sentence2_len': 15.85, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}} |
XNLIV2 (Upadhyay et al., 2023) |
['asm', 'ben', 'bho', 'ell', 'guj', 'kan', 'mar', 'ory', 'pan', 'rus', 'san', 'tam', 'tur'] |
PairClassification |
s2s |
[Non-fiction, Fiction, Government, Written] |
None |
None |
XPQARetrieval (Shen et al., 2023) |
['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'jpn', 'kor', 'pol', 'por', 'spa', 'tam'] |
Retrieval |
s2p |
[Reviews, Written] |
None |
None |
XQuADRetrieval (Mikel Artetxe, 2019) |
['arb', 'deu', 'ell', 'eng', 'hin', 'ron', 'rus', 'spa', 'tha', 'tur', 'vie', 'zho'] |
Retrieval |
s2p |
[Web, Written] |
None |
None |
XStance |
['deu', 'fra', 'ita'] |
PairClassification |
s2s |
[Social, Written] |
None |
None |
YahooAnswersTopicsClassification (Zhang et al., 2015) |
['eng'] |
Classification |
s2s |
[Web, Written] |
None |
None |
YelpReviewFullClassification (Zhang et al., 2015) |
['eng'] |
Classification |
s2s |
[Reviews, Written] |
None |
None |
YueOpenriceReviewClassification (Xiang et al., 2019) |
['yue'] |
Classification |
s2s |
[Reviews, Spoken] |
None |
None |
indonli |
['ind'] |
PairClassification |
s2s |
[Encyclopaedic, Web, News, Written] |
None |
None |
mFollowIRCrossLingualInstructionRetrieval (Weller et al., 2024) |
['eng', 'fas', 'rus', 'zho'] |
Retrieval |
s2p |
[News, Written] |
{'test': 121758} |
{'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283654099, 'average_document_length': 2331.08, 'average_query_length': 81.88, 'average_instruction_length': 389.95, 'average_changed_instruction_length': 450.55, 'average_relevant_docs_per_query': 10.43, 'average_top_ranked_per_query': 1000.0, 'hf_subset_descriptive_stats': {'eng-fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129597567, 'average_document_length': 3145.5, 'average_query_length': 80.08, 'average_instruction_length': 396.88, 'average_changed_instruction_length': 463.18, 'average_relevant_docs_per_query': 10.85, 'average_top_ranked_per_query': 1000.0}, 'eng-rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109522175, 'average_document_length': 2784.08, 'average_query_length': 81.88, 'average_instruction_length': 371.12, 'average_changed_instruction_length': 431.8, 'average_relevant_docs_per_query': 9.78, 'average_top_ranked_per_query': 1000.0}, 'eng-zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44534357, 'average_document_length': 1082.05, 'average_query_length': 83.56, 'average_instruction_length': 401.02, 'average_changed_instruction_length': 456.26, 'average_relevant_docs_per_query': 10.65, 'average_top_ranked_per_query': 1000.0}}}} |
mFollowIRInstructionRetrieval (Weller et al., 2024) |
['fas', 'rus', 'zho'] |
Retrieval |
s2p |
[News, Written] |
{'test': 121758} |
{'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283622456, 'average_document_length': 2331.08, 'average_query_length': 57.11, 'average_instruction_length': 281.07, 'average_changed_instruction_length': 326.94, 'average_relevant_docs_per_query': 10.43, 'average_top_ranked_per_query': 1000.0, 'hf_subset_descriptive_stats': {'fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129593838, 'average_document_length': 3145.5, 'average_query_length': 72.65, 'average_instruction_length': 358.93, 'average_changed_instruction_length': 415.32, 'average_relevant_docs_per_query': 10.85, 'average_top_ranked_per_query': 1000.0}, 'rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109523683, 'average_document_length': 2784.08, 'average_query_length': 77.5, 'average_instruction_length': 387.0, 'average_changed_instruction_length': 458.0, 'average_relevant_docs_per_query': 9.78, 'average_top_ranked_per_query': 1000.0}, 'zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44504935, 'average_document_length': 1082.05, 'average_query_length': 23.7, 'average_instruction_length': 110.09, 'average_changed_instruction_length': 122.81, 'average_relevant_docs_per_query': 10.65, 'average_top_ranked_per_query': 1000.0}}}} |