forked from chigkim/Ollama-MMLU-Pro
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.toml
49 lines (38 loc) · 2.67 KB
/
config.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
[server]
url = "http://localhost:8000/v1"
api_key = "api key"
model = "llama3"
model_note = "LORA"
timeout = 600.0
[inference]
# Ssettings below are from evaluate_from_local.py for VLLM on TIGER-AI-Lab/MMLU-Pro
temperature = 0.0
top_p = 1.0 # not specified but default for VLLM
max_tokens = 1024
stop = "<|eot_id|>" # Change to stop token of model being tested
# The variable {subject} will be replaced with appropriate value in runtime.
system_prompt = "You are an expert that knows everything. You are tasked with answering a multiple-choice question. The following is a multiple choice question (with answers) about {subject}. Give your final answer in the format of `The answer is (chosen answer)`."
# Indo system prompt
# system_prompt = "Anda adalah seseorang yang pintar dan mengetahui segalanya. Anda diberi perintah untuk menjawab pertanyaan pilihan ganda. Berikut adalah pertanyaan pilihan ganda tentang {subject}. Jawab dengan format: 'Jawabannya adalah (pilihan jawaban)'."
# Japan system prompt
# system_prompt = "あなたはすべてを知っている専門家です。多肢選択式の質問に回答することが求められます。次は、{subject} に関する多肢選択式の質問です。最終的な回答を「答えは (選択した回答) です」という形式で入力してください。"
# Korean system prompt
# system_prompt = "당신은 모든 것을 아는 전문가입니다. 당신은 객관식 질문에 답하는 임무를 맡고 있습니다. 다음은 {subject}에 관한 객관식 질문(답변 포함)입니다. 최종 답변은 '대답은 (선택된 답변)' 형식으로 작성해 주세요."
# Chinese system prompt
# system_prompt = "你是一位无所不知的专家。你的任务是回答一道多项选择题。以下是一道关于{subject}的多项选择题(附答案)。请以“答案是(选定答案)”的格式给出你的最终答案。"
# "single_chat" inserts all the COT examples and question into a single message. Default style for GPT-4O script, but raises a lot of format issues especially for small models.
# "multi_chat" inserts COT examples into multi-turn messages. Use for instruct/chat models.
# "no_chat" uses v1/completion api. Use for non-instruct/chat model.
style = "multi_chat"
[test]
categories = ['biology', 'business', 'chemistry', 'computer science', 'economics', 'engineering', 'health', 'history', 'law', 'math', 'philosophy', 'physics', 'psychology', 'other']
parallel = 16
# Use chain of thoughts prompting or no.
cot = true
# Supported languages = en, id, ja, ko, zh
language = "en"
[log]
# Verbosity between 0-2
verbosity = 0
# If true, logs exact prompt sent to the model in the test result files.
log_prompt = true