dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
test_split: test
fewshot_split: dev
fewshot_config:
  sampler: first_n
output_type: generate_until
doc_to_text: "Given the following question and four candidate answers (A, B, C and D), choose the answer.\nQuestion: {{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nThink step by step. Your response should end with 'The answer is ([the_answer_letter])' where the [the_answer_letter] is one of A, B, C and D."
# doc_to_text: "Given the following question and four candidate answers (A, B, C and D), choose the answer.\nQuestion: {{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\n"
doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
generation_kwargs:
  max_tokens: 3072
  until:
    - "</s>"
    - "\n"
metric_list:
  - metric: exact_match
    aggregation: mean
    higher_is_better: true
    ignore_punctuation: true
    ignore_case: true
filter_list:
  - name: get_response
    filter:
      - function: "mmlu_regex"
        regex_pattern: "(?<=The answer is )(.*)"
      # Filter everything after the first break line
      # - function: "regex"
      #   regex_pattern: "^(.*?)(?=\\n|$)"
      # Remove leading white spaces
      - function: remove_whitespace
      # function to ignore right white spaces or line breaks
      # - function: "regex"
      #   regex_pattern: "^(.*?)\\s*$"
      - function: take_first
metadata:
  version: 3.0
dataset_kwargs:
  trust_remote_code: true
