diff --git a/README.md b/README.md
index f27c43a24..74770e1eb 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,8 @@
 # OpenSSA: Neurosymbolic Agentic AI for Industrial Problem-Solving
 
 OpenSSA is an open-source neurosymbolic agentic AI framework
-designed to solve complex, high-stakes problems in industries like semiconductor, manufacturing and finance,
-where consistency, accuracy and deterministic outcomes are essential.
+designed to solve complex, high-stakes problems in industries like semiconductor, energy and finance,
+where consistency, accuracy and deterministic outcomes are paramount.
 
 At the core of OpenSSA is the [__Domain-Aware Neurosymbolic Agent (DANA)__](https://arxiv.org/abs/2410.02823) architecture,
 advancing generative AI from basic pattern matching and information retrieval to industrial-grade problem solving.
diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md
index 79277276f..96e5025de 100644
--- a/docs/GETTING_STARTED.md
+++ b/docs/GETTING_STARTED.md
@@ -16,10 +16,10 @@ Go straight to [OpenSSA Streamlit app](https://openssa.streamlit.app/) and start
 
 ## Getting Started as a Developer
 
-See some example user programs in the [examples/notebooks](./examples/notebooks) directory. For example, to see the sample use case on ALD semiconductor knowledge, do:
+See some example user programs in the [examples](./examples) directory. For example, to see the sample use case on semiconductor knowledge, do:
 
 ```bash
-% cd examples/notebooks
+% cd examples/semiconductor
 ```
 
 ### Common `make` targets for OpenSSA developers
diff --git a/docs/diagrams/ssm-QA-vs-PS.drawio.png b/docs/diagrams/ssm-QA-vs-PS.drawio.png
deleted file mode 100644
index b7258c66d..000000000
Binary files a/docs/diagrams/ssm-QA-vs-PS.drawio.png and /dev/null differ
diff --git a/docs/diagrams/ssm-class-diagram.drawio.png b/docs/diagrams/ssm-class-diagram.drawio.png
deleted file mode 100644
index 6825e32c1..000000000
Binary files a/docs/diagrams/ssm-class-diagram.drawio.png and /dev/null differ
diff --git a/docs/diagrams/ssm-composability.drawio.png b/docs/diagrams/ssm-composability.drawio.png
deleted file mode 100644
index b72645565..000000000
Binary files a/docs/diagrams/ssm-composability.drawio.png and /dev/null differ
diff --git a/docs/diagrams/ssm-full-industrial-use-case.drawio.png b/docs/diagrams/ssm-full-industrial-use-case.drawio.png
deleted file mode 100644
index 7d7a14e21..000000000
Binary files a/docs/diagrams/ssm-full-industrial-use-case.drawio.png and /dev/null differ
diff --git a/docs/diagrams/ssm-industrial-use-case.drawio.png b/docs/diagrams/ssm-industrial-use-case.drawio.png
deleted file mode 100644
index 343182cb4..000000000
Binary files a/docs/diagrams/ssm-industrial-use-case.drawio.png and /dev/null differ
diff --git a/docs/diagrams/ssm-key-components.drawio.png b/docs/diagrams/ssm-key-components.drawio.png
deleted file mode 100644
index 13770ee7d..000000000
Binary files a/docs/diagrams/ssm-key-components.drawio.png and /dev/null differ
diff --git a/docs/diagrams/ssm-llama-index-integration-patterns.drawio.png b/docs/diagrams/ssm-llama-index-integration-patterns.drawio.png
deleted file mode 100644
index 00a93dfb0..000000000
Binary files a/docs/diagrams/ssm-llama-index-integration-patterns.drawio.png and /dev/null differ
diff --git a/docs/diagrams/ssm-llama-index-integration.drawio.png b/docs/diagrams/ssm-llama-index-integration.drawio.png
deleted file mode 100644
index 557106f46..000000000
Binary files a/docs/diagrams/ssm-llama-index-integration.drawio.png and /dev/null differ
diff --git a/docs/diagrams/ssm-team-of-experts.drawio.png b/docs/diagrams/ssm-team-of-experts.drawio.png
deleted file mode 100644
index dc21e7437..000000000
Binary files a/docs/diagrams/ssm-team-of-experts.drawio.png and /dev/null differ
diff --git a/examples/FinanceBench-Lite/.env.template b/examples/FinanceBench-Lite/.env.template
new file mode 100644
index 000000000..9c9789785
--- /dev/null
+++ b/examples/FinanceBench-Lite/.env.template
@@ -0,0 +1,2 @@
+HF_API_KEY=[... HuggingFace API key if running HuggingFace-hosted models ...]
+OPENAI_API_KEY=[... OpenAI API key if running on OpenAI services ...]
diff --git a/examples/FinanceBench-Lite/.gitignore b/examples/FinanceBench-Lite/.gitignore
new file mode 100644
index 000000000..1b80d89fc
--- /dev/null
+++ b/examples/FinanceBench-Lite/.gitignore
@@ -0,0 +1,15 @@
+# data files
+.data/
+
+# environment variables
+.env
+
+# iPython/Jupyter notebooks
+*.ipynb
+
+# log files
+.log/
+*.log
+
+# Streamlit secrets
+.streamlit/secrets.toml
diff --git a/examples/FinanceBench-Lite/Makefile b/examples/FinanceBench-Lite/Makefile
new file mode 100644
index 000000000..dc5045571
--- /dev/null
+++ b/examples/FinanceBench-Lite/Makefile
@@ -0,0 +1,33 @@
+dana-solve:
+	@poetry run python dana.py ${id}
+
+dana-solve-w-knowledge:
+	@poetry run python dana.py ${id} --knowledge
+
+dana-solve-w-prog-store:
+	@poetry run python dana.py ${id} --prog-store
+
+dana-solve-w-knowledge-and-prog-store:
+	@poetry run python dana.py ${id} --knowledge --prog-store
+
+dana-solve-w-llama3:
+	@poetry run python dana.py ${id} --llama3
+
+dana-solve-w-knowledge-w-llama3:
+	@poetry run python dana.py ${id} --knowledge --llama3
+
+dana-solve-w-prog-store-w-llama3:
+	@poetry run python dana.py ${id} --prog-store --llama3
+
+dana-solve-w-knowledge-and-prog-store-w-llama3:
+	@poetry run python dana.py ${id} --knowledge --prog-store --llama3
+
+dana-solve-all-combos:
+	@poetry run python dana.py ${id}
+	@poetry run python dana.py ${id} --knowledge
+	@poetry run python dana.py ${id} --prog-store
+	@poetry run python dana.py ${id} --knowledge --prog-store
+	@poetry run python dana.py ${id} --llama3
+	@poetry run python dana.py ${id} --knowledge --llama3
+	@poetry run python dana.py ${id} --prog-store --llama3
+	@poetry run python dana.py ${id} --knowledge --prog-store --llama3
diff --git a/examples/FinanceBench-Lite/README.md b/examples/FinanceBench-Lite/README.md
new file mode 100644
index 000000000..6b27245db
--- /dev/null
+++ b/examples/FinanceBench-Lite/README.md
@@ -0,0 +1,58 @@
+<!-- markdownlint-disable MD013 MD043 -->
+
+# OpenSSA-FinanceBench Lite benchmarking
+
+This is a lite version of the benchmarking of `OpenSSA` performance
+on the `FinanceBench` dataset. We will use 1 question from the dataset to demonstrate the use of `OpenSSA` with `DANA` architecture.
+
+## [`FinanceBench` Dataset](https://github.com/patronus-ai/financebench/blob/main/financebench_sample_150.csv)
+
+## Getting Started with DANA Agent
+
+Have Python 3.12 installed.
+
+__Install__ project, and update its dependencies from time to time:
+__`make install`__.
+
+Create `.env` file following the `.env.template` and fill in necessary credentials.
+
+__Solve__ the problem corresponding to a problem `00807` `financebench_id`:
+__`make dana-solve id=00807`__.
+
+### Question
+
+`Does 3M have a reasonably healthy liquidity profile based on its quick ratio for Q2 of FY2023? If the quick ratio is not relevant to measure liquidity, please state that and explain why.`
+
+### Knowledge
+
+To solve this question, you can add knowledge related to `liquidity`. See the example below:
+
+- Liquidity Metric Formulas
+  - `(Net) Working Capital` = `(Total) Current Assets` - `(Total) Current Liabilities`
+  - `Working Capital Ratio` = `(Total) Current Assets` / `(Total) Current Liabilities`
+
+Go to `knowledge-store.txt` to add relevant knowledge yourself and see how it helps the agent to solve this question.
+
+### Program
+
+With the above-provided knowledge, the program we can provide to the agent could be as below:
+
+- Goal: To assess liquidity health of a company, calculate `quick ratio`
+  - Task: To calculate `quick ratio`, use this formula
+            `Quick Ratio` = (
+          (`Cash & Cash Equivalents` +
+           `Short-Term Investments or (Current) Marketable Securities` +
+           `(Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables`)
+          / `(Total) Current Liabilities`
+        )
+        - Sub-task 1: What are values in dollars of `Cash & Cash Equivalents`?
+        - Sub-task 2: What are values in dollars of `Short-Term Investments or (Current) Marketable Securities`?
+        - Sub-task 3: What are values in dollars of `(Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables`?
+        - Sub-task 4: What are values in dolloars of `(Total) Current Liabilities`?
+
+Go to `program-store.yml` to see details of the program yourself! You can experimenting with different plans to see how it helps the agent solve the problem as well.
+
+## Advancing DANA Agent with Domain Knowledge and Program Store
+
+- To solve the question with added domain knowledge, run `make dana-solve-w-knowledge id=00807`
+- To solve the question with added domain knowledge and program store, run `make dana-solve-w-knowledge-and-prog-store id=00807`
diff --git a/examples/FinanceBench-Lite/dana.py b/examples/FinanceBench-Lite/dana.py
new file mode 100644
index 000000000..92ec4ee61
--- /dev/null
+++ b/examples/FinanceBench-Lite/dana.py
@@ -0,0 +1,155 @@
+from argparse import ArgumentParser
+from functools import cache
+
+from openssa import DANA, ProgramStore, HTP, HTPlanner, FileResource, LMConfig
+from openssa.core.util.lm.huggingface import HuggingFaceLM
+from openssa.core.util.lm.openai import OpenAILM, default_llama_index_openai_lm
+
+# pylint: disable=wrong-import-order,wrong-import-position
+from data_and_knowledge import (DocName, FbId, Answer, Doc, FB_ID_COL_NAME, DOC_NAMES_BY_FB_ID, QS_BY_FB_ID,
+                                EXPERT_KNOWLEDGE, EXPERT_PROGRAMS, EXPERT_HTP_COMPANY_KEY, EXPERT_HTP_PERIOD_KEY)
+from util import QAFunc, enable_batch_qa_and_eval, log_qa_and_update_output_file
+
+
+@cache
+def get_main_lm(use_llama3: bool = False):
+    return (HuggingFaceLM if use_llama3 else OpenAILM).from_defaults()
+
+
+@cache
+def get_or_create_expert_program_store(use_llama3: bool = False) -> ProgramStore:
+    program_store = ProgramStore(lm=get_main_lm(use_llama3=use_llama3))
+
+    for program_name, htp_dict in EXPERT_PROGRAMS.items():
+        htp = HTP.from_dict(htp_dict)
+        program_store.add_or_update_program(name=program_name, description=htp.task.ask, program=htp)
+
+    return program_store
+
+
+@cache
+def get_or_create_agent(doc_name: DocName, expert_knowledge: bool = False, expert_programs: bool = False,
+                        max_depth=3, max_subtasks_per_decomp=6,
+                        use_llama3: bool = False,
+                        llama_index_openai_lm_name: str = LMConfig.OPENAI_DEFAULT_MODEL) -> DANA:
+    # pylint: disable=too-many-arguments
+    return DANA(knowledge={EXPERT_KNOWLEDGE} if expert_knowledge else None,
+
+                program_store=(get_or_create_expert_program_store(use_llama3=use_llama3)
+                               if expert_programs
+                               else ProgramStore()),
+
+                programmer=HTPlanner(lm=get_main_lm(use_llama3=use_llama3),
+                                     max_depth=max_depth, max_subtasks_per_decomp=max_subtasks_per_decomp),
+
+                resources={FileResource(path=Doc(name=doc_name).dir_path,
+                                        lm=default_llama_index_openai_lm(llama_index_openai_lm_name))})
+
+
+@cache
+def get_or_create_adaptations(doc_name: DocName) -> dict[str, str]:
+    return {EXPERT_HTP_COMPANY_KEY: (doc := Doc(name=doc_name)).company, EXPERT_HTP_PERIOD_KEY: doc.period}
+
+
+@enable_batch_qa_and_eval(output_name='DANA')
+@log_qa_and_update_output_file(output_name='DANA')
+def solve(fb_id: FbId) -> Answer:
+    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id]).solve(
+        problem=QS_BY_FB_ID[fb_id],
+        adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
+
+
+@enable_batch_qa_and_eval(output_name='DANA-wKnowledge')
+@log_qa_and_update_output_file(output_name='DANA-wKnowledge')
+def solve_with_knowledge(fb_id: FbId) -> Answer:
+    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True).solve(
+        problem=QS_BY_FB_ID[fb_id],
+        adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
+
+
+@enable_batch_qa_and_eval(output_name='DANA-wProgStore')
+@log_qa_and_update_output_file(output_name='DANA-wProgStore')
+def solve_with_program_store(fb_id: FbId) -> Answer:
+    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_programs=True).solve(
+        problem=QS_BY_FB_ID[fb_id],
+        adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
+
+
+@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wProgStore')
+@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wProgStore')
+def solve_with_knowledge_and_program_store(fb_id: FbId) -> Answer:
+    return get_or_create_agent(DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, expert_programs=True).solve(
+        problem=QS_BY_FB_ID[fb_id],
+        adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
+
+
+@enable_batch_qa_and_eval(output_name='DANA-wLlama3')
+@log_qa_and_update_output_file(output_name='DANA-wLlama3')
+def solve_with_llama3(fb_id: FbId) -> Answer:
+    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], use_llama3=True).solve(
+        problem=QS_BY_FB_ID[fb_id],
+        adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
+
+
+@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wLlama3')
+@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wLlama3')
+def solve_with_knowledge_with_llama3(fb_id: FbId) -> Answer:
+    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, use_llama3=True).solve(
+        problem=QS_BY_FB_ID[fb_id],
+        adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
+
+
+@enable_batch_qa_and_eval(output_name='DANA-wProgStore-wLlama3')
+@log_qa_and_update_output_file(output_name='DANA-wProgStore-wLlama3')
+def solve_with_program_store_with_llama3(fb_id: FbId) -> Answer:
+    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_programs=True, use_llama3=True).solve(
+        problem=QS_BY_FB_ID[fb_id],
+        adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
+
+
+@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wProgStore-wLlama3')
+@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wProgStore-wLlama3')
+def solve_with_knowledge_and_program_store_with_llama3(fb_id: FbId) -> Answer:
+    return get_or_create_agent(DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, expert_programs=True, use_llama3=True).solve(  # noqa: E501
+        problem=QS_BY_FB_ID[fb_id],
+        adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
+
+
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('fb_id')
+    arg_parser.add_argument('--from-id', action='store_true')
+    arg_parser.add_argument('--knowledge', action='store_true')
+    arg_parser.add_argument('--prog-store', action='store_true')
+    arg_parser.add_argument('--llama3', action='store_true')
+    args = arg_parser.parse_args()
+
+    match (args.knowledge, args.prog_store, args.llama3):
+        case (False, False, False):
+            solve_func: QAFunc = solve
+
+        case (True, False, False):
+            solve_func: QAFunc = solve_with_knowledge
+
+        case (False, True, False):
+            solve_func: QAFunc = solve_with_program_store
+
+        case (True, True, False):
+            solve_func: QAFunc = solve_with_knowledge_and_program_store
+
+        case (False, False, True):
+            solve_func: QAFunc = solve_with_llama3
+
+        case (True, False, True):
+            solve_func: QAFunc = solve_with_knowledge_with_llama3
+
+        case (False, True, True):
+            solve_func: QAFunc = solve_with_program_store_with_llama3
+
+        case (True, True, True):
+            solve_func: QAFunc = solve_with_knowledge_and_program_store_with_llama3
+
+    if not (fb_id := args.fb_id).startswith(FB_ID_COL_NAME):
+        fb_id: FbId = f'{FB_ID_COL_NAME}_{fb_id}'
+
+    solve_func(f'from:{fb_id}' if args.from_id else fb_id)
diff --git a/examples/FinanceBench-Lite/data_and_knowledge.py b/examples/FinanceBench-Lite/data_and_knowledge.py
new file mode 100644
index 000000000..7dbf1e41e
--- /dev/null
+++ b/examples/FinanceBench-Lite/data_and_knowledge.py
@@ -0,0 +1,332 @@
+from __future__ import annotations
+
+from collections import Counter
+from dataclasses import dataclass, field
+import base64
+from enum import StrEnum
+from functools import cached_property
+from pathlib import Path
+from typing import TypedDict, Required, NotRequired, Literal, TYPE_CHECKING
+
+from dotenv import load_dotenv
+from pandas import DataFrame, read_json, read_csv
+import requests
+import yaml
+
+if TYPE_CHECKING:
+    from openssa.core.planning.hierarchical.plan import HTPDict
+
+
+load_dotenv()
+
+
+type DocName = str
+type FbId = str
+type Question = str
+type Answer = str
+type ExpertPlanId = str
+
+
+class Category(StrEnum):
+    RETRIEVE: str = '0-RETRIEVE'
+    COMPARE: str = '1-COMPARE'
+    CALC_CHANGE: str = '2-CALC-CHANGE'
+    CALC_COMPLEX: str = '3-CALC-COMPLEX'
+    CALC_AND_JUDGE: str = '4-CALC-AND-JUDGE'
+    EXPLAIN_FACTORS: str = '5-EXPLAIN-FACTORS'
+    OTHER_ADVANCED: str = '6-OTHER-ADVANCED'
+
+
+type GroundTruth = TypedDict('GroundTruth', {'sector': Required[str],
+
+                                             'company': Required[str],
+                                             'period': Required[int],
+                                             'doc-type': Required[str],
+                                             'doc': Required[DocName],
+
+                                             'question-type': Required[str],
+                                             'question-reasoning': Required[str],
+                                             'domain-question-num': Required[str | None],
+                                             'question': Required[Question],
+
+                                             'answer': Required[Answer],
+                                             'justification': Required[str],
+                                             'page(s)-0based': Required[int],
+                                             'page(s)': Required[str],
+
+                                             'category': Required[Category],
+                                             'correctness': Required[str],
+                                             'answer-inadequate': NotRequired[Literal[True]],
+                                             'evaluator-unreliable': NotRequired[Literal[True]]},
+                             total=False)
+
+
+type RAGGroundTruths = TypedDict('RAGGroundTruths', {'defs': Required[dict[str, str]],
+                                                     'ground-truths': Required[dict[str,  # doc
+                                                                                    dict[str,  # statement
+                                                                                         dict[str,  # line item
+                                                                                              dict[int | str,  # period
+                                                                                                   str  # ground truth
+                                                                                                   ]]]]]})
+
+
+NON_BOT_REQUEST_HEADERS: dict[str, str] = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
+}
+
+
+REPO_RAW_CONTENT_URL_PREFIX: str = 'https://raw.githubusercontent.com/patronus-ai/financebench'
+DOC_INFO_URL: str = f'{REPO_RAW_CONTENT_URL_PREFIX}/main/data/financebench_document_information.jsonl'
+METADATA_JSONL_URL: str = f'{REPO_RAW_CONTENT_URL_PREFIX}/main/data/financebench_open_source.jsonl'
+METADATA_CSV_URL: str = f'{REPO_RAW_CONTENT_URL_PREFIX}/641ae9ece2cae93c671cf59c2d53742b51c7f1aa/financebench_sample_150.csv'
+
+FB_ID_COL_NAME: str = 'financebench_id'
+
+META_DF: DataFrame = (read_json(METADATA_JSONL_URL,
+                                orient='records', typ='frame',
+                                dtype=True, convert_axes=True,
+                                convert_dates=True, keep_default_dates=True,
+                                precise_float=False, date_unit=None,
+                                encoding='utf-8', encoding_errors='strict',
+                                lines=True, chunksize=None,
+                                compression=None, nrows=None,
+                                storage_options=None,
+                                dtype_backend='pyarrow', engine='ujson')
+
+                      .merge(right=read_json(
+                                DOC_INFO_URL,
+                                orient='records', typ='frame',
+                                dtype=True, convert_axes=True,
+                                convert_dates=True, keep_default_dates=True,
+                                precise_float=False, date_unit=None,
+                                encoding='utf-8', encoding_errors='strict',
+                                lines=True, chunksize=None,
+                                compression=None, nrows=None,
+                                storage_options=None,
+                                dtype_backend='pyarrow', engine='ujson'),
+
+                             how='left', on='doc_name',  # left_on='doc_name', right_on='doc_name',
+                             left_index=False, right_index=False,
+                             sort=False,
+                             suffixes=('', '_'),
+                             copy=False,
+                             indicator=False,
+                             validate=None  # TODO: 'many_to_one' after Patronus AI fixes FOOTLOCKER_2022_annualreport
+                             )
+
+                      .set_index(keys=FB_ID_COL_NAME,
+                                 drop=True, append=False,
+                                 inplace=False,
+                                 verify_integrity=True))
+
+META_DF.fillna(value='', method=None, axis=None, inplace=True, limit=None)  # replace PyArrow NAs
+
+LEGACY_META_DF: DataFrame = read_csv(METADATA_CSV_URL,
+                                     sep=',',  # delimiter=',',
+                                     header='infer', names=None, index_col=FB_ID_COL_NAME, usecols=None,
+                                     dtype=None, engine='pyarrow', converters=None, true_values=None, false_values=None,
+                                     skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None,
+                                     na_values=None, na_filter=None, keep_default_na=True,
+                                     skip_blank_lines=True,
+                                     parse_dates=False, date_format=None, dayfirst=False, cache_dates=True,
+                                     iterator=False, chunksize=None, compression=None,
+                                     thousands=None, decimal='.',
+                                     lineterminator=None,
+                                     quotechar=None, quoting=0, doublequote=True,
+                                     escapechar=None, comment=None,
+                                     encoding='utf-8', encoding_errors='strict',
+                                     dialect=None,
+                                     on_bad_lines='error',
+                                     low_memory=True, memory_map=False,
+                                     float_precision=None,
+                                     storage_options=None,
+                                     dtype_backend='pyarrow')
+
+assert (META_DF.index == LEGACY_META_DF.index).all()
+# assert (META_DF.doc_name == LEGACY_META_DF.doc_name).all()  # J&J docs have been fixed
+assert (META_DF.doc_period == LEGACY_META_DF.doc_period).all()
+assert (META_DF.doc_link == LEGACY_META_DF.doc_link).all()
+assert (META_DF.question_type == LEGACY_META_DF.question_type).all()
+assert (META_DF.question == LEGACY_META_DF.question).all()
+# assert (META_DF.answer == LEGACY_META_DF.answer).all()  # 01107 answer has been fixed
+
+DOC_NAMES: list[DocName] = sorted(META_DF.doc_name.unique())
+DOC_LINKS_BY_NAME: dict[DocName, str] = dict(zip(META_DF.doc_name, META_DF.doc_link))
+DOC_NAMES_BY_FB_ID: dict[FbId, DocName] = META_DF.doc_name.to_dict()
+
+FB_IDS: list[FbId] = META_DF.index.to_list()
+FB_IDS_BY_DOC_NAME: dict[DocName, list[FbId]] = META_DF.groupby('doc_name').apply(lambda _: _.index.to_list())
+
+QS_BY_FB_ID: dict[FbId, Question] = META_DF.question.to_dict()
+
+
+LOCAL_CACHE_DIR_PATH: Path = Path(__file__).parent / '.data'
+LOCAL_CACHE_DOCS_DIR_PATH: Path = LOCAL_CACHE_DIR_PATH / 'docs'
+OUTPUT_FILE_PATH: Path = LOCAL_CACHE_DIR_PATH / 'output.csv'
+
+
+GROUND_TRUTHS_FILE_PATH = Path(__file__).parent / 'ground-truths.yml'
+with open(file=GROUND_TRUTHS_FILE_PATH,
+          buffering=-1,
+          encoding='utf-8',
+          errors='strict',
+          newline=None,
+          closefd=True,
+          opener=None) as f:
+    GROUND_TRUTHS: dict[FbId, GroundTruth] = yaml.safe_load(stream=f)
+
+N_CASES: int = len(GROUND_TRUTHS)
+CAT_DISTRIB: Counter[Category] = Counter(ground_truth['category'] for ground_truth in GROUND_TRUTHS.values())
+
+
+EXPERT_KNOWLEDGE_FILE_PATH: Path = Path(__file__).parent / 'knowledge-store.txt'
+with open(file=EXPERT_KNOWLEDGE_FILE_PATH,
+          buffering=-1,
+          encoding='utf-8',
+          errors='strict',
+          newline=None,
+          closefd=True,
+          opener=None) as f:
+    EXPERT_KNOWLEDGE: str = f.read()
+
+
+EXPERT_PROGRAMS_FILE_PATH: Path = Path(__file__).parent / 'program-store.yml'
+with open(file=EXPERT_PROGRAMS_FILE_PATH,
+          buffering=-1,
+          encoding='utf-8',
+          errors='strict',
+          newline=None,
+          closefd=True,
+          opener=None) as f:
+    EXPERT_PROGRAMS: dict[ExpertPlanId, HTPDict] = yaml.safe_load(stream=f)
+
+EXPERT_HTP_COMPANY_KEY: str = 'COMPANY'
+EXPERT_HTP_PERIOD_KEY: str = 'PERIOD'
+
+
+RAG_GROUND_TRUTHS_FILE_PATH: Path = Path(__file__).parent / 'rag-ground-truths.yml'
+with open(file=RAG_GROUND_TRUTHS_FILE_PATH,
+          buffering=-1,
+          encoding='utf-8',
+          errors='strict',
+          newline=None,
+          closefd=True,
+          opener=None) as f:
+    RAG_GROUND_TRUTHS: RAGGroundTruths = yaml.safe_load(stream=f)
+
+
+@dataclass
+class Doc:
+    name: DocName
+    company: str = field(init=False, repr=False)
+    period: str = field(init=False, repr=False)
+    type: str = field(init=False, repr=False)
+
+    def __post_init__(self):
+        self.company, self.period, self.type = self.name.split(sep='_', maxsplit=2)
+
+    def request(self) -> requests.Response:
+        try:
+            response: requests.Response = requests.get(
+                url=(url := ((base64.b64decode(doc_link.split(sep=q, maxsplit=-1)[-1], altchars=None)
+                              .decode(encoding='utf-8', errors='strict'))
+                             if (q := '?pdfTarget=') in (doc_link := DOC_LINKS_BY_NAME[self.name])
+                             else doc_link)),
+                timeout=60,
+                stream=True)
+
+        except requests.exceptions.ConnectionError:
+            response: requests.Response = requests.get(
+                url=(url := f'{REPO_RAW_CONTENT_URL_PREFIX}/main/pdfs/{self.name}.pdf'),
+                timeout=60,
+                stream=True)
+
+        if response.headers.get('Content-Type') != 'application/pdf':
+            response: requests.Response = requests.get(url=url,
+                                                       headers=NON_BOT_REQUEST_HEADERS,
+                                                       timeout=60,
+                                                       stream=True)
+
+        return response
+
+    @cached_property
+    def dir_path(self) -> Path:
+        dir_path: Path = LOCAL_CACHE_DOCS_DIR_PATH / self.name
+
+        if not (file_path := dir_path / f'{self.name}.pdf').is_file():
+            dir_path.mkdir(parents=True, exist_ok=True)
+
+            response: requests.Response = self.request()
+
+            with open(file=file_path, mode='wb', buffering=-1, encoding=None, newline=None, closefd=True, opener=None) as f:
+                f.write(response.content)
+
+        return dir_path
+
+    @cached_property
+    def file_path(self) -> Path:
+        return self.dir_path / f'{self.name}.pdf'
+
+
+def create_or_update_ground_truths() -> dict[FbId, GroundTruth]:
+    ground_truths: dict[FbId, GroundTruth] = {fb_id: {'sector': row.gics_sector,
+                                                      'company': row.company, 'period': row.doc_period, 'doc-type': row.doc_type,
+                                                      'doc': row.doc_name,
+                                                      'question-type': row.question_type,
+                                                      'question-reasoning': row.question_reasoning,
+                                                      'domain-question-num': row.domain_question_num,
+                                                      'question': row.question,
+                                                      'answer': row.answer, 'justification': row.justification,
+                                                      'page(s)-0based': row.evidence[0]['evidence_page_num']}
+                                              for fb_id, row in META_DF.iterrows()}
+
+    if GROUND_TRUTHS_FILE_PATH.is_file():
+        with open(file=GROUND_TRUTHS_FILE_PATH,
+                  buffering=-1,
+                  encoding='utf-8',
+                  errors='strict',
+                  newline=None,
+                  closefd=True,
+                  opener=None) as f:
+            existing_ground_truths: dict[FbId, GroundTruth] = yaml.safe_load(stream=f)
+
+        for fb_id, ground_truth in ground_truths.items():
+            if (existing_ground_truth := existing_ground_truths.get(fb_id)):
+                for existing_key in set(existing_ground_truth).difference(ground_truth):
+                    ground_truth[existing_key] = existing_ground_truth[existing_key]
+
+    with open(file=GROUND_TRUTHS_FILE_PATH,
+              mode='w',
+              buffering=-1,
+              encoding='utf-8',
+              errors='strict',
+              newline=None,
+              closefd=True,
+              opener=None) as f:
+        yaml.safe_dump(data=ground_truths,
+                       stream=f,
+                       default_style=None,
+                       default_flow_style=False,
+                       canonical=None,
+                       indent=2,
+                       width=80,
+                       allow_unicode=True,
+                       line_break=None,
+                       encoding='utf-8',
+                       explicit_start=None,
+                       explicit_end=None,
+                       version=None,
+                       tags=None,
+                       sort_keys=False)
+
+    return ground_truths
+
+
+def get_or_create_output_df() -> DataFrame:
+    output_df: DataFrame = (read_csv(OUTPUT_FILE_PATH, index_col=FB_ID_COL_NAME)
+                            if OUTPUT_FILE_PATH.is_file()
+                            else META_DF[['doc_name', 'question', 'answer']])
+
+    output_df.loc[:, 'category'] = [GROUND_TRUTHS[fb_id]['category'] for fb_id in output_df.index]
+
+    return output_df
diff --git a/examples/FinanceBench-Lite/eval.py b/examples/FinanceBench-Lite/eval.py
new file mode 100644
index 000000000..77f491f4f
--- /dev/null
+++ b/examples/FinanceBench-Lite/eval.py
@@ -0,0 +1,301 @@
+from __future__ import annotations
+
+import argparse
+from collections import defaultdict
+from functools import cache
+from pprint import pprint
+from typing import TYPE_CHECKING
+
+from dotenv import load_dotenv
+from loguru import logger
+from pandas import DataFrame, notna, read_csv
+from tqdm import tqdm
+
+from openssa.core.util.lm.config import LMConfig
+from openssa.core.util.lm.openai import OpenAILM
+
+# pylint: disable=wrong-import-order
+from data_and_knowledge import (FbId, Question, Answer, Category, GroundTruth,
+                                FB_ID_COL_NAME, GROUND_TRUTHS, N_CASES, CAT_DISTRIB,
+                                LOCAL_CACHE_DIR_PATH, OUTPUT_FILE_PATH, get_or_create_output_df)
+from log import switch_log_file
+
+if TYPE_CHECKING:
+    from openssa.core.util.lm.abstract import AbstractLM
+
+
+EVAL_PROMPT_TEMPLATE: str = \
+"""You shall act as a judge of question-answering correctness.
+
+Given the posed QUESTION below, evaluate whether the ANSWER below is correct
+according to the criteria specified in the CORRECTNESS EVALUATION RUBRIC below.
+
+- The evaluation should regard the ANSWER as responding to the QUESTION,
+  and hence the ANSWER does not need to repeat contextual information already in the QUESTION;
+
+- The evaluation should follow the RUBRIC strictly,
+  not looking for in the ANSWER more elaboration/explanation than what the RUBRIC explicitly requires;
+
+- Financial and technical terminology can be treated as case-insensitive.
+
+Output only a single word, either:
+- YES: if you judge the ANSWER to be correct; or
+- NO: if you judge the ANSWER to be incorrect.
+
+QUESTION:
+---------
+```
+{question}
+```
+
+ANSWER TO EVALUATE:
+-------------------
+```
+{answer}
+```
+
+CORRECTNESS EVALUATION RUBRIC:
+------------------------------
+```
+{rubric}
+```
+"""  # noqa: E122
+
+
+load_dotenv()
+
+
+@cache
+def get_lm(model='gpt-4o') -> AbstractLM:
+    return OpenAILM(model=model, api_key=LMConfig.OPENAI_API_KEY, api_base=LMConfig.OPENAI_API_URL)
+
+
+def human_eval_recommended(fb_id: FbId) -> bool | None:
+    return (ground_truth := GROUND_TRUTHS[fb_id]).get('answer-inadequate') or ground_truth.get('evaluator-unreliable')
+
+
+def eval_correctness(fb_id: FbId, answer: Answer, output_name: str | None = None,  # pylint: disable=too-many-arguments
+                     n_times: int = 9, human: bool = True, debug: bool = False) -> bool:
+    if output_name:
+        switch_log_file(fb_id=fb_id, output_name=output_name)
+
+    question: Question = (ground_truth := GROUND_TRUTHS[fb_id])['question']
+    rubric: str = ground_truth['correctness']
+    prompt: str = EVAL_PROMPT_TEMPLATE.format(question=question, answer=answer, rubric=rubric)
+
+    lm: AbstractLM = get_lm()
+
+    for _ in range(n_times):
+        score: str = ''
+
+        while score not in {'YES', 'NO'}:
+            score: str = lm.get_response(prompt=prompt, temperature=0)
+
+        if score == 'NO':
+            logger.warning(f'\n{fb_id}\n{ground_truth['doc']}:\n{question}\n'
+                           '\n'
+                           f'ANSWER JUDGED TO BE INCORRECT:\n{answer}\n'
+                           '\n'
+                           f'RUBRIC:\n{rubric}' +
+                           ('\n\n(*** EXPERT ANSWER KNOWN TO BE INADEQUATE ***)\n'
+                            if GROUND_TRUTHS[fb_id].get('answer-inadequate')
+                            else '\n'))
+
+            if debug:
+                logger.debug(f'PROMPT:\n{prompt}')
+
+            if human and human_eval_recommended(fb_id=fb_id):
+                human_eval_str: str = ''
+                while not human_eval_str:
+                    human_eval_str: str = input('\n*** HUMAN EVALUATION ***: if answer is correct, type "Y": ').strip()
+
+                correct: bool = human_eval_str.lower().startswith('y')
+
+            else:
+                correct: bool = False
+
+            break
+
+    else:
+        correct: bool = True
+
+    if output_name:
+        output_df: DataFrame = get_or_create_output_df()
+        output_df.loc[fb_id, f'{output_name}---CORRECTNESS']: bool = correct
+        output_df.to_csv(OUTPUT_FILE_PATH, index=True)
+
+    return correct
+
+
+def eval_all(output_name: str, refresh: bool = True, n_times: int = 9, human: bool = True, debug: bool = False):
+    # pylint: disable=too-many-locals
+    output_df: DataFrame = get_or_create_output_df()
+
+    n_yes_scores_by_category: defaultdict = defaultdict(int)
+    incorrect_answer_fb_ids: dict[FbId, str] = {}
+
+    for fb_id, answer in tqdm(output_df[output_name].items(), total=N_CASES):
+        ground_truth: GroundTruth = GROUND_TRUTHS[fb_id]
+
+        if (eval_correctness(fb_id=fb_id, answer=answer, output_name=output_name, n_times=n_times, human=human, debug=debug)  # noqa: E501
+                if refresh
+                else (notna(correctness := output_df.loc[fb_id, f'{output_name}---CORRECTNESS']) and correctness)):
+            n_yes_scores_by_category[ground_truth['category']] += 1
+
+        else:
+            incorrect_answer_fb_ids[fb_id]: str = ('expert answer inadequate'
+                                                   if ground_truth.get('answer-inadequate')
+                                                   else ('evaluator unreliable'
+                                                         if ground_truth.get('evaluator-unreliable')
+                                                         else ''))
+
+    logger.info(f'TOTAL CORRECT: {(n := sum(n_yes_scores_by_category.values()))} / {N_CASES} = {n / N_CASES:.1%}')
+
+    pprint(correctness_by_category := {category: (f'{(n := n_yes_scores_by_category[category])} / {n_for_category} '
+                                                  f'= {n / n_for_category:.1%}')
+                                       for category, n_for_category in CAT_DISTRIB.items()})
+
+    pprint({
+        'EASY': (f'{(e := sum(n_yes_scores_by_category[easy_cat]
+                              for easy_cat in (Category.RETRIEVE, Category.COMPARE, Category.CALC_CHANGE)))} / '
+                 f'{(se := sum(CAT_DISTRIB[easy_cat]
+                               for easy_cat in (Category.RETRIEVE, Category.COMPARE, Category.CALC_CHANGE)))} '
+                 f'= {e / se:.1%}'),
+
+        'HARD': (f'{(h := sum(n_yes_scores_by_category[hard_cat]
+                              for hard_cat in (Category.CALC_COMPLEX, Category.CALC_AND_JUDGE,
+                                               Category.EXPLAIN_FACTORS, Category.OTHER_ADVANCED)))} / '
+                 f'{(sh := sum(CAT_DISTRIB[hard_cat]
+                               for hard_cat in (Category.CALC_COMPLEX, Category.CALC_AND_JUDGE,
+                                                Category.EXPLAIN_FACTORS, Category.OTHER_ADVANCED)))} '
+                 f'= {h / sh:.1%}')
+    })
+
+    logger.warning('INCORRECT:')
+    pprint(incorrect_answer_fb_ids)
+
+    return correctness_by_category
+
+
+def compare_eval(output_name: str, baseline_output_name: str = 'RAG-Default'):
+    output_df: DataFrame = get_or_create_output_df()
+
+    baseline_correctness_by_category: dict[str, str] = eval_all(output_name=baseline_output_name, refresh=False)
+    correctness_by_category: dict[str, str] = eval_all(output_name=output_name, refresh=False)
+    pprint({category: {output_name: correctness_summary, baseline_output_name: baseline_correctness_by_category[category]}
+            for category, correctness_summary in correctness_by_category.items()})
+
+    output_df.loc[:, baseline_output_name] = output_df[f'{baseline_output_name}---CORRECTNESS']
+    output_df.loc[:, output_name] = output_df[f'{output_name}---CORRECTNESS']
+    return output_df.loc[output_df[output_name] != output_df[baseline_output_name],
+                         ['doc_name', 'category', baseline_output_name, output_name]]
+
+
+def eval_accuracy_and_consistency_wrt_ground_truths(output_name: str, output_file_names: list[str]):
+    # pylint: disable=too-many-locals
+
+    n_output_files: int = len(output_file_names)
+    correctness_col_name: str = f'{output_name}---CORRECTNESS'
+
+    n_yes_scores_by_fb_id: defaultdict = defaultdict(int)
+    incorrect_answer_fb_ids: dict[FbId, str] = {}
+
+    for output_df in (read_csv(LOCAL_CACHE_DIR_PATH / output_file_name, index_col=FB_ID_COL_NAME)
+                      for output_file_name in output_file_names):
+
+        for fb_id, correctness in output_df[correctness_col_name].items():
+            ground_truth: GroundTruth = GROUND_TRUTHS[fb_id]
+
+            if notna(correctness) and correctness:
+                n_yes_scores_by_fb_id[fb_id] += 1
+
+            else:
+                incorrect_answer_fb_ids[fb_id]: str = ('expert answer inadequate'
+                                                       if ground_truth.get('answer-inadequate')
+                                                       else ('evaluator unreliable'
+                                                             if ground_truth.get('evaluator-unreliable')
+                                                             else ''))
+
+    cumu_avg_accuracy_scores_by_category: defaultdict = defaultdict(int)
+    cumu_consistency_scores_by_category: defaultdict = defaultdict(float)
+
+    for fb_id, ground_truth in GROUND_TRUTHS.items():
+        cumu_avg_accuracy_scores_by_category[cat := ground_truth['category']] += (a := n_yes_scores_by_fb_id[fb_id] / n_output_files)
+        cumu_consistency_scores_by_category[cat] += 2 * abs(a - 0.5)
+
+    print(f'TOTAL CORRECT: {(n := sum(cumu_avg_accuracy_scores_by_category.values()))} / {N_CASES} = {n / N_CASES:.1%}')
+
+    pprint({category: (f'{(n := cumu_avg_accuracy_scores_by_category[category])} / {n_for_category} '
+                       f'= {n / n_for_category:.1%}')
+            for category, n_for_category in CAT_DISTRIB.items()})
+
+    pprint({
+        'EASY': (f'{(e := sum(cumu_avg_accuracy_scores_by_category[easy_cat]
+                              for easy_cat in (Category.RETRIEVE, Category.COMPARE, Category.CALC_CHANGE)))} / '
+                 f'{(se := sum(CAT_DISTRIB[easy_cat]
+                               for easy_cat in (Category.RETRIEVE, Category.COMPARE, Category.CALC_CHANGE)))} '
+                 f'= {e / se:.1%}'),
+
+        'HARD': (f'{(h := sum(cumu_avg_accuracy_scores_by_category[hard_cat]
+                              for hard_cat in (Category.CALC_COMPLEX, Category.CALC_AND_JUDGE,
+                                               Category.EXPLAIN_FACTORS, Category.OTHER_ADVANCED)))} / '
+                 f'{(sh := sum(CAT_DISTRIB[hard_cat]
+                               for hard_cat in (Category.CALC_COMPLEX, Category.CALC_AND_JUDGE,
+                                                Category.EXPLAIN_FACTORS, Category.OTHER_ADVANCED)))} '
+                 f'= {h / sh:.1%}')
+    })
+
+    print(f'\nTOTAL CONSISTENT: {(n := sum(cumu_consistency_scores_by_category.values()))} / {N_CASES} = {n / N_CASES:.1%}')
+
+    pprint({category: (f'{(n := cumu_consistency_scores_by_category[category])} / {n_for_category} '
+                       f'= {n / n_for_category:.1%}')
+            for category, n_for_category in CAT_DISTRIB.items()})
+
+    pprint({
+        'EASY': (f'{(e := sum(cumu_consistency_scores_by_category[easy_cat]
+                              for easy_cat in (Category.RETRIEVE, Category.COMPARE, Category.CALC_CHANGE)))} / '
+                 f'{(se := sum(CAT_DISTRIB[easy_cat]
+                               for easy_cat in (Category.RETRIEVE, Category.COMPARE, Category.CALC_CHANGE)))} '
+                 f'= {e / se:.1%}'),
+
+        'HARD': (f'{(h := sum(cumu_consistency_scores_by_category[hard_cat]
+                              for hard_cat in (Category.CALC_COMPLEX, Category.CALC_AND_JUDGE,
+                                               Category.EXPLAIN_FACTORS, Category.OTHER_ADVANCED)))} / '
+                 f'{(sh := sum(CAT_DISTRIB[hard_cat]
+                               for hard_cat in (Category.CALC_COMPLEX, Category.CALC_AND_JUDGE,
+                                                Category.EXPLAIN_FACTORS, Category.OTHER_ADVANCED)))} '
+                 f'= {h / sh:.1%}')
+    })
+
+    print('\nINCORRECT:')
+    pprint(incorrect_answer_fb_ids)
+
+
+if __name__ == '__main__':
+    arg_parser = argparse.ArgumentParser()
+
+    arg_parser.add_argument('answer_col', help='Name of the column containing answers to evaluate')
+    arg_parser.add_argument('--id', default='all', help='FinanceBench Case ID')
+    arg_parser.add_argument('--n-times', type=int, default=9, help='Number of times to evaluate')
+
+    arg_parser.add_argument('--human-eval', dest='human_eval', action='store_true', help='Human Evaluation ON')
+    arg_parser.add_argument('--no-human-eval', dest='human_eval', action='store_false', help='Human Evaluation OFF')
+    arg_parser.set_defaults(human_eval=True)
+
+    arg_parser.add_argument('--refresh', dest='refresh', action='store_true', help='Evaluation Refreshing ON')
+    arg_parser.add_argument('--no-refresh', dest='refresh', action='store_false', help='Evaluation Refreshing OFF')
+    arg_parser.set_defaults(refresh=True)
+
+    arg_parser.add_argument('--debug', action='store_true', help='Debug by printing out prompts')
+
+    args = arg_parser.parse_args()
+
+    if 'all' in args.id.lower():
+        eval_all(output_name=args.answer_col, refresh=args.refresh, n_times=args.n_times, human=args.human_eval, debug=args.debug)  # noqa: E501
+
+    else:
+        logger.info(
+            eval_correctness(fb_id=args.id,
+                             answer=read_csv(OUTPUT_FILE_PATH, index_col=FB_ID_COL_NAME).loc[args.id, args.answer_col],
+                             output_name=args.answer_col,
+                             n_times=args.n_times, human=args.human_eval, debug=args.debug))
diff --git a/examples/FinanceBench-Lite/ground-truths.yml b/examples/FinanceBench-Lite/ground-truths.yml
new file mode 100644
index 000000000..7cc0d1fc3
--- /dev/null
+++ b/examples/FinanceBench-Lite/ground-truths.yml
@@ -0,0 +1,4608 @@
+financebench_id_03029:
+  sector: Industrials
+
+  company: 3M
+  period: 2018
+  doc-type: 10k
+  doc: 3M_2018_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: What is the FY2018 capital expenditure amount (in USD millions) for 3M?
+    Give a response to the question by relying on the details shown in the cash flow
+    statement.
+
+  answer: $1577.00
+  justification: 'The metric capital expenditures was directly extracted from the
+    company 10K. The line item name, as seen in the 10K, was: Purchases of property,
+    plant and equipment (PP&E).'
+  page(s)-0based: 59
+  page(s): '60'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    1577, 1577 million, 1.577 billion,
+    1600, 1600 million or 1.6 billion
+
+
+financebench_id_04672:
+  sector: Industrials
+
+  company: 3M
+  period: 2018
+  doc-type: 10k
+  doc: 3M_2018_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: 'Assume that you are a public equities analyst. Answer the following question
+    by primarily using information that is shown in the balance sheet: what is the
+    year end FY2018 net PPNE for 3M? Answer in USD billions.'
+
+  answer: $8.70
+  justification: "The metric ppne, net was directly extracted from the company 10K.\
+    \ The line item name, as seen in the 10K, was: Property, plant and equipment â\x80\
+    \x94 net."
+  page(s)-0based: 57
+  page(s): '58'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    8.738, 8.738 billion, 8738 million,
+    8.7, 8.7 billion or 8700 million
+
+  evaluator-unreliable: true
+
+
+financebench_id_00499:
+  sector: Industrials
+
+  company: 3M
+  period: 2022
+  doc-type: 10k
+  doc: 3M_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning)
+  domain-question-num: dg06
+  question: Is 3M a capital-intensive business based on FY2022 data?
+
+  answer: 'No, the company is managing its CAPEX and Fixed Assets pretty efficiently,
+    which is evident from below key metrics:
+
+    CAPEX/Revenue Ratio: 5.1%
+
+    Fixed assets/Total Assets: 20%
+
+    Return on Assets= 12.4%'
+  justification: 'CAPEX/Revenue
+
+    Fixed Assets/Total Assets
+
+    ROA=Net Income/Total Assets'
+  page(s)-0based: 47
+  page(s): 48,50,52
+
+  category: 6-OTHER-ADVANCED
+  correctness: |-
+    the answer opines that 3M is actually managing capital assets efficiently, and justifies such opinion
+    by certain calculated financial ratio metric value(s) showing at least one of the following:
+    - Fixed Assets is not large as proportion of Total Assets;
+    - Capital Expenditure (CapEx) is not high relative to Revenue; and/or
+    - Return on (Total) Assets (RoA or RoTA) is quite good
+
+  evaluator-unreliable: true
+
+
+financebench_id_01226:
+  sector: Industrials
+
+  company: 3M
+  period: 2022
+  doc-type: 10k
+  doc: 3M_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning) OR Numerical
+    reasoning OR Logical reasoning
+  domain-question-num: dg17
+  question: What drove operating margin change as of FY2022 for 3M? If operating margin
+    is not a useful metric for a company like this, then please state that and explain
+    why.
+
+  answer: "Operating Margin for 3M in FY2022 has decreased by 1.7% primarily due to:\
+    \ \n-Decrease in gross Margin\n-mostly one-off charges including Combat Arms Earplugs\
+    \ litigation, impairment related to exiting PFAS manufacturing, costs related\
+    \ to exiting Russia and divestiture-related restructuring\ncharges"
+  justification: ''
+  page(s)-0based: 26
+  page(s): '27'
+
+  category: 0-RETRIEVE
+  correctness: |-
+    the answer mentions at least 1 salient change among those discussed below:
+
+    COST OF SALES:
+    Cost of sales, measured as a percent of sales, increased in 2022 when compared to the same period last year.
+    Increases were primarily due to 2022 special item costs for significant litigation from additional commitments
+    to address PFAS-related matters at 3M's Zwijndrecht, Belgium site, higher raw materials and logistics costs,
+    manufacturing productivity headwinds which were further magnified by the shutdown of certain operations in Belgium
+    and progress on restarting previously-idled operations, and investments in growth, productivity and sustainability.
+    On a percent of sales basis, these increases were partially offset by increases in selling prices.
+
+    SELLING, GENERAL AND ADMINISTRATIVE EXPENSES:
+    SG&A, measured as a percent of sales, increased in 2022 when compared to the same period last year.
+    SG&A was impacted by increased special item costs for significant litigation primarily related to steps toward
+    resolving Combat Arms Earplugs litigation resulting in a 2022 second quarter pre-tax charge of approximately $1.2 billion,
+    certain impairment costs related to exiting PFAS manufacturing, costs related to exiting Russia,
+    divestiture-related restructuring charges, and continued investment in key growth initiatives.
+    These increases were partially offset by restructuring benefits and ongoing general 3M cost management.
+
+    RESEARCH, DEVELOPMENT AND RELATED EXPENSES:
+    R&D, measured as a percent of sales, decreased in 2022 when compared to the same period last year.
+    3M continues to invest in a range of R&D activities from application development, product and manufacturing support,
+    product development and technology development aimed at disruptive innovations.
+
+    GAIN ON BUSINESS DIVESTITURES:
+    In the third quarter of 2022, 3M recorded a pre-tax gain of $2.7 billion ($2.7 billion after tax)
+    related to the split-off and combination of its Food Safety business with Neogen Corporation.
+
+    GOODWILL IMPAIRMENT EXPENSE:
+    As a result of 3M's commitment to exit per- and polyfluoroalkyl substance (PFAS) manufacturing,
+    3M recorded a goodwill impairment charge related to the Advanced Materials reporting unit
+    (within the Transportation and Electronics business).
+
+
+financebench_id_01865:  # tricky: Total Sales Change contains zero Acquisitions but non-zero Divestitures
+  sector: Industrials
+
+  company: 3M
+  period: 2022
+  doc-type: 10k
+  doc: 3M_2022_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: If we exclude the impact of M&A, which segment has dragged down 3M's overall
+    growth in 2022?
+
+  answer: The consumer segment shrunk by 0.9% organically.
+  justification: ''
+  page(s)-0based: 24
+  page(s): '25'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Consumer segment as negative contributor
+
+
+financebench_id_00807:
+  sector: Industrials
+
+  company: 3M
+  period: 2023
+  doc-type: 10q
+  doc: 3M_2023Q2_10Q
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning) OR Logical
+    reasoning
+  domain-question-num: dg01
+  question: Does 3M have a reasonably healthy liquidity profile based on its quick
+    ratio for Q2 of FY2023? If the quick ratio is not relevant to measure liquidity,
+    please state that and explain why.
+
+  answer: No. The quick ratio for 3M was 0.96 by Jun'23 close, which needs a bit of
+    an improvement to touch the 1x mark
+  justification: 'Quick Ratio= (Total current assets-Total inventories)/Total current
+    liabilities
+
+    (15,754-5,280)/10,936'
+  page(s)-0based: 4
+  page(s): '5'
+
+  category: 4-CALC-AND-JUDGE
+  correctness: >-
+    the answer contains a calculated Quick Ratio decimal value that is over 0.75 but less than 1.00,
+    or, alternatively, a calculated percentage value that is over 75% but less than 100%
+
+
+financebench_id_00941:
+  sector: Industrials
+
+  company: 3M
+  period: 2023
+  doc-type: 10q
+  doc: 3M_2023Q2_10Q
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg04
+  question: Which debt securities are registered to trade on a national securities
+    exchange under 3M's name as of Q2 of 2023?
+
+  answer: 'Following debt securities registered under 3M''s name are listed to trade
+    on the New York Stock Exchange:
+
+    -1.500% Notes due 2026 (Trading Symbol: MMM26)
+
+    -1.750% Notes due 2030 (Trading Symbol: MMM30)
+
+    -1.500% Notes due 2031 (Trading Symbol: MMM31)'
+  justification: ''
+  page(s)-0based: 0
+  page(s): '1'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions notes/securities due 2026, 2030 and 2031
+
+  evaluator-unreliable: true
+
+
+financebench_id_01858:
+  sector: Industrials
+
+  company: 3M
+  period: 2023
+  doc-type: 10q
+  doc: 3M_2023Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Does 3M maintain a stable trend of dividend distribution?
+
+  answer: Yes, not only they distribute the dividends on a routine basis, 3M has also
+    been increasing the per share dividend for consecutive 65 years
+  justification: ''
+  page(s)-0based: 61
+  page(s): '62'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer affirms that dividends have been stable, and/or mentions "65 years", "65th year" or something similar
+
+  evaluator-unreliable: true
+
+
+financebench_id_02987:
+  sector: Communication Services
+
+  company: Activision Blizzard
+  period: 2019
+  doc-type: 10k
+  doc: ACTIVISIONBLIZZARD_2019_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'What is the FY2019 fixed asset turnover ratio for Activision Blizzard?
+    Fixed asset turnover ratio is defined as: FY2019 revenue / (average PP&E between
+    FY2018 and FY2019). Round your answer to two decimal places. Base your judgments
+    on the information provided primarily in the statement of income and the statement
+    of financial position.'
+
+  answer: '24.26'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Total revenue. This metric was located in the 10K as a single line item
+    named: Total net revenues.
+
+
+    Metric 2: Ppne, net. This metric was located in the 10K as a single line item
+    named: Property and equipment, net.'
+  page(s)-0based: 68
+  page(s): 69,70
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Fixed Asset Turnover Ratio decimal value that is in the range from 23.00 to 25.00
+    (if the answer is a single number, assume that it is that calculated Fixed Asset Turnover Ratio decimal value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_07966:
+  sector: Communication Services
+
+  company: Activision Blizzard
+  period: 2019
+  doc-type: 10k
+  doc: ACTIVISIONBLIZZARD_2019_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is the FY2017 - FY2019 3 year average of capex as a % of revenue
+    for Activision Blizzard? Answer in units of percents and round to one decimal
+    place. Calculate (or extract) the answer from the statement of income and the
+    cash flow statement.
+
+  answer: 1.9%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Capital expenditures. This metric was located in the 10K as a single
+    line item named: Capital expenditures.
+
+
+    Metric 2: Total revenue. This metric was located in the 10K as a single line item
+    named: Total net revenues.'
+  page(s)-0based: 69
+  page(s): 70,73
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated percentage value that is in the range from 1.70% to 2.10%,
+    or, alternatively, a calculated decimal value that is in the range from 0.0170 to 0.0210
+    (if the answer is a single number, assume that it is that calculated metric value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_04735:
+  sector: Information Technology
+
+  company: Adobe
+  period: 2015
+  doc-type: 10k
+  doc: ADOBE_2015_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'You are an investment banker and your only resource(s) to answer the
+    following question is (are): the statement of financial position and the cash
+    flow statement. Here''s the question: what is the FY2015 operating cash flow ratio
+    for Adobe? Operating cash flow ratio is defined as: cash from operations / total
+    current liabilities. Round your answer to two decimal places.'
+
+  answer: '0.66'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Cash from operations. This metric was located in the 10K as a single
+    line item named: Net cash provided by operating activities.
+
+
+    Metric 2: Total current liabilities. This metric was located in the 10K as a single
+    line item named: Total current liabilities.'
+  page(s)-0based: 58
+  page(s): 59,63
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Operating Cash Flow Ratio decimal value that is in the range from 0.6000 to 0.7000,
+    or, alternatively, a calculated percentage value that is in the range from 60.00% to 70.00%
+    (if the answer is a single number, assume that it is that calculated Operating Cash Flow Ratio metric value)
+
+
+financebench_id_07507:
+  sector: Information Technology
+
+  company: Adobe
+  period: 2016
+  doc-type: 10k
+  doc: ADOBE_2016_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is Adobe's year-over-year change in unadjusted operating income from
+    FY2015 to FY2016 (in units of percents and round to one decimal place)? Give a
+    solution to the question by using the income statement.
+
+  answer: 65.4%
+  justification: 'The metric unadjusted operating income was directly extracted from
+    the company 10K. The line item name, as seen in the 10K, was: Operating income.
+    The final step was to execute the desired percent change calculation on unadjusted
+    operating income.'
+  page(s)-0based: 61
+  page(s): '62'
+
+  category: 2-CALC-CHANGE
+  correctness: >-
+    the answer contains a calculated Operating Income change percentage value that is in the range from 60.0% or 70.0%
+    (if the answer is a single number, assume that it is that calculated Operating Income change percentage value)
+
+
+financebench_id_03856:
+  sector: Information Technology
+
+  company: Adobe
+  period: 2017
+  doc-type: 10k
+  doc: ADOBE_2017_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'What is the FY2017 operating cash flow ratio for Adobe? Operating cash
+    flow ratio is defined as: cash from operations / total current liabilities. Round
+    your answer to two decimal places. Please utilize information provided primarily
+    within the balance sheet and the cash flow statement.'
+
+  answer: '0.83'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Cash from operations. This metric was located in the 10K as a single
+    line item named: Net cash provided by operating activities.
+
+
+    Metric 2: Total current liabilities. This metric was located in the 10K as a single
+    line item named: Total current liabilities.'
+  page(s)-0based: 56
+  page(s): 57,61
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Operating Cash Flow Ratio decimal value that is in the range from 0.8000 to 0.8500,
+    or, alternatively, a calculated percentage value that is in the range from 80.00% to 85.00%
+    (if the answer is a single number, assume that it is that calculated Operating Cash Flow Ratio metric value)
+
+
+financebench_id_00438:
+  sector: Information Technology
+
+  company: Adobe
+  period: 2022
+  doc-type: 10k
+  doc: ADOBE_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning OR information extraction
+  domain-question-num: dg14
+  question: Does Adobe have an improving operating margin profile as of FY2022? If
+    operating margin is not a useful metric for a company like this, then state that
+    and explain why.
+
+  answer: No the operating margins of Adobe have recently declined from 36.8% in FY
+    2021 to 34.6% in FY2022. A drop by 2.2% in a year.
+  justification: '6098/16388
+
+    5802/14573'
+  page(s)-0based: 53
+  page(s): '54'
+
+  category: 4-CALC-AND-JUDGE
+  correctness: >-
+    the answer contains calculated Operating Margin percentage or decimal values for 2021 and 2022,
+    and concludes that such metric decreased
+
+  evaluator-unreliable: true
+
+
+financebench_id_00591:
+  sector: Information Technology
+
+  company: Adobe
+  period: 2022
+  doc-type: 10k
+  doc: ADOBE_2022_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Does Adobe have an improving Free cashflow conversion as of FY2022?
+
+  answer: Yes, the FCF conversion (using net income as the denominator) for Adobe
+    has improved by ~13% from 143% in 2021 to 156% in 2022
+  justification: 'FCF Conversion: (Net cash provided by operating activities - Purchases
+    of property and equipment)/Net income
+
+    (7838-442)/4756
+
+    (7230-348)/4822'
+  page(s)-0based: 56
+  page(s): '57'
+
+  category: 4-CALC-AND-JUDGE
+  correctness: >-
+    the answer contains calculated Free Cash Flow Conversion Ratio percentage or decimal values for 2021 and 2022,
+    and concludes that such metric increased
+
+  evaluator-unreliable: true
+
+
+financebench_id_01319:
+  sector: Utilities
+
+  company: AES Corporation
+  period: 2022
+  doc-type: 10k
+  doc: AES_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg21
+  question: What is the quantity of restructuring costs directly outlined in AES Corporation's
+    income statements for FY2022? If restructuring costs are not explicitly outlined
+    then state 0.
+
+  answer: '0'
+  justification: ''
+  page(s)-0based: 131
+  page(s): '132'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer states 0, zero, and/or that restructuring costs are not explicitly mentioned/reported
+
+  evaluator-unreliable: true
+
+
+financebench_id_00540:
+  sector: Utilities
+
+  company: AES Corporation
+  period: 2022
+  doc-type: 10k
+  doc: AES_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning OR Logical reasoning
+  domain-question-num: dg25
+  question: Roughly how many times has AES Corporation sold its inventory in FY2022?
+    Calculate inventory turnover ratio for the FY2022; if conventional inventory management
+    is not meaningful for the company then state that and explain why.
+
+  answer: AES has converted inventory 9.5 times in FY 2022.
+  justification: 'Cost of sales/Inventory
+
+    10069/1055'
+  page(s)-0based: 129
+  page(s): 130,132
+
+  category: 3-CALC-COMPLEX
+  correctness: |-
+    the answer contains a calculated Inventory Turnover Ratio (or Inventory Conversion Ratio) decimal value that is either:
+    - in the range from 9.0 to 10.0 times (implicitly using ending Inventory as denominator), or
+    - approximately 12.0 times (implicitly using average Inventory as denominator)
+    (if the answer is a single number, assume that it is that calculated Inventory Turnover Ratio decimal value)
+
+
+financebench_id_10420:
+  sector: Utilities
+
+  company: AES Corporation
+  period: 2022
+  doc-type: 10k
+  doc: AES_2022_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'Based on the information provided primarily in the statement of financial
+    position and the statement of income, what is AES''s FY2022 return on assets (ROA)?
+    ROA is defined as: FY2022 net income / (average total assets between FY2021 and
+    FY2022). Round your answer to two decimal places.'
+  answer: '-0.02'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Net income. This metric was located in the 10K as a single line item
+    named: NET INCOME (LOSS) ATTRIBUTABLE TO THE AES CORPORATION.
+
+
+    Metric 2: Total assets. This metric was located in the 10K as a single line item
+    named: TOTAL ASSETS.'
+  page(s)-0based: 129
+  page(s): 130,132
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Return on Assets (RoA)
+    percentage value that is NEGATIVE and in the range from -2.00% to -1.40%,
+    or, alternatively, a calculated decimal value that is NEGATIVE and in the range from -0.0200 to -0.0140
+    (if the answer is a single number, assume that it is that calculated Return on Assets (RoA) metric value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_06655:
+  sector: Consumer Discretionary
+
+  company: Amazon
+  period: 2017
+  doc-type: 10k
+  doc: AMAZON_2017_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'What is Amazon''s FY2017 days payable outstanding (DPO)? DPO is defined
+    as: 365 * (average accounts payable between FY2016 and FY2017) / (FY2017 COGS
+    + change in inventory between FY2016 and FY2017). Round your answer to two decimal
+    places. Address the question by using the line items and information shown within
+    the balance sheet and the P&L statement.'
+
+  answer: '93.86'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Accounts payable. This metric was located in the 10K as a single line
+    item named: Accounts payable.
+
+
+    Metric 2: Inventories. This metric was located in the 10K as a single line item
+    named: Inventories.
+
+
+    Metric 3: Cost of goods sold. This metric was located in the 10K as a single line
+    item named: Cost of sales.'
+  page(s)-0based: 37
+  page(s): 38,40
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Days Payable Outstanding (DPO) decimal value that is in the range from 90.00 to 100.00
+    (if the answer is a single number, assume that it is that calculated Days Payable Outstanding (DPO) metric value)
+
+
+financebench_id_08135:
+  sector: Consumer Discretionary
+
+  company: Amazon
+  period: 2017
+  doc-type: 10k
+  doc: AMAZON_2017_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is Amazon's year-over-year change in revenue from FY2016 to FY2017
+    (in units of percents and round to one decimal place)? Calculate what was asked
+    by utilizing the line items clearly shown in the statement of income.
+
+  answer: 30.8%
+  justification: 'The metric total revenue was directly extracted from the company
+    10K. The line item name, as seen in the 10K, was: Total net sales. The final step
+    was to execute the desired percent change calculation on total revenue.'
+  page(s)-0based: 37
+  page(s): '38'
+
+  category: 2-CALC-CHANGE
+  correctness: >-
+    the answer contains a calculated Revenue change percentage value that is in the range from 30.0% to 31.0%
+    (if the answer is a single number, assume that it is that calculated Revenue change percentage value)
+
+
+financebench_id_08286:
+  sector: Consumer Discretionary
+
+  company: Amazon
+  period: 2019
+  doc-type: 10k
+  doc: AMAZON_2019_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: By drawing conclusions from the information stated only in the income
+    statement, what is Amazon's FY2019 net income attributable to shareholders (in
+    USD millions)?
+
+  answer: $11588.00
+  justification: 'The metric net income was directly extracted from the company 10K.
+    The line item name, as seen in the 10K, was: Net income.'
+  page(s)-0based: 37
+  page(s): '38'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    11588, 11588 million, 11.588 billion,
+    11600, 11600 million or 11.6 billion
+
+
+financebench_id_03882:
+  sector: Materials
+
+  company: Amcor
+  period: 2020
+  doc-type: 10k
+  doc: AMCOR_2020_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: What is Amcor's year end FY2020 net AR (in USD millions)? Address the
+    question by adopting the perspective of a financial analyst who can only use the
+    details shown within the balance sheet.
+
+  answer: $1616.00
+  justification: 'The metric accounts receivable, net was directly extracted from
+    the company 10K. The line item name, as seen in the 10K, was: Trade receivables,
+    net.'
+  page(s)-0based: 49
+  page(s): '50'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    1615.9, 1615.9 million,
+    1616, 1616 million, 1.616 billion,
+    1600, 1600 million or 1.6 billion
+
+  evaluator-unreliable: true
+
+
+financebench_id_01935:
+  sector: Materials
+
+  company: Amcor
+  period: 2022
+  doc-type: 8k
+  doc: AMCOR_2022_8K_dated-2022-07-01
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What was the key agenda of the AMCOR's 8k filing dated 1st July 2022?
+
+  answer: Amcor Finance (USA), Inc. and Amcor Flexibles North America, Inc., entered
+    into supplemental indentures relating to Guaranteed Senior Notes due 2026 and
+    2028. This involved the substitution of the Substitute Issuer (Amcor Flexibles
+    North America) for the Former Issuer (Amcor Finance) and the assumption of covenants
+    under the indentures. (In essence a novation agreement)
+  justification: ''
+  page(s)-0based: 1
+  page(s): '2'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions on of the terms "supplemental", "indendure(s)", "substitute" or "substitution"
+
+  evaluator-unreliable: true
+
+
+financebench_id_00799:
+  sector: Materials
+
+  company: Amcor
+  period: 2023
+  doc-type: 10k
+  doc: AMCOR_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning OR Logical reasoning
+  domain-question-num: dg02
+  question: Has AMCOR's quick ratio improved or declined between FY2023 and FY2022?
+    If the quick ratio is not something that a financial analyst would ask about a
+    company like this, then state that and explain why.
+
+  answer: The quick ratio has slightly improved from 0.67 times to 0.69 times between
+    FY 2023 and FY 2022.(3.4% jump)
+  justification: 'Quick Ratio= (Total current assets-(Raw materials and supplies+Work
+    in process and finished goods))/Total current liabilities
+
+    (5308-992-1221)/4476
+
+    (5853-1114-1325)/5103'
+  page(s)-0based: 51
+  page(s): '52'
+
+  category: 4-CALC-AND-JUDGE
+  correctness: >-
+    the answer contains calculated Quick Ratio decimal or percentage values for 2022 and 2023,
+    both over 0.50 but less than 0.75 (if decimal), or, alternatively, over 50% but less than 75% (if percentage);
+    the answer then concludes that such metric increased
+
+
+financebench_id_01079:
+  sector: Materials
+
+  company: Amcor
+  period: 2023
+  doc-type: 10k
+  doc: AMCOR_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg10
+  question: What are major acquisitions that AMCOR has done in FY2023, FY2022 and
+    FY2021?
+
+  answer: 'Amcor completed these acquisitions during FY2023:
+
+    -100% equity interest of a flexibles manufacturing company in the Czech Republic
+
+    - 100% equity interest in a medical device packaging manufacturing site in
+
+    Shanghai, China.
+
+    -acquisition of a New Zealand-based leading manufacturer of state-of-the-art,
+    automated protein
+
+    packaging machines.'
+  justification: ''
+  page(s)-0based: 63
+  page(s): '64'
+
+  category: 0-RETRIEVE
+  correctness: |-
+    the answer mentions acquisitions in at least 2 of the following:
+    - Czech Republic;
+    - New Zealand; and
+    - Shanghai, China (or, alternatively, just "Shanghai" or just "China")
+
+
+financebench_id_01148:
+  sector: Materials
+
+  company: Amcor
+  period: 2023
+  doc-type: 10k
+  doc: AMCOR_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction OR Logical reasoning OR
+  domain-question-num: dg12
+  question: What industry does AMCOR primarily operate in?
+
+  answer: Amcor is a global leader in packaging production for various use cases.
+  justification: ''
+  page(s)-0based: 4
+  page(s): '5'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions "packaging"
+
+
+financebench_id_00684:
+  sector: Materials
+
+  company: Amcor
+  period: 2023
+  doc-type: 10k
+  doc: AMCOR_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning OR information extraction
+  domain-question-num: dg13
+  question: Does AMCOR have an improving gross margin profile as of FY2023? If gross
+    margin is not a useful metric for a company like this, then state that and explain
+    why.
+
+  answer: No. For AMCOR there has been a slight decline in gross margins by 0.8%.
+  justification: 'Gross Profit/Net Sales
+
+    2725/14694
+
+    2820/14544'
+  page(s)-0based: 49
+  page(s): '50'
+
+  category: 4-CALC-AND-JUDGE
+  correctness: >-
+    the answer contains calculated Gross Margin percentage or decimal values for 2022 and 2023,
+    and concludes that such metric decreased
+  answer-inadequate: true
+
+
+financebench_id_01936:
+  sector: Materials
+
+  company: Amcor
+  period: 2023
+  doc-type: 10q
+  doc: AMCOR_2023Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What is the nature & purpose of AMCOR's restructuring liability as oF
+    Q2 of FY2023 close?
+
+  answer: 87% of the total restructuring liability is related Employee liabilities.
+  justification: ''
+  page(s)-0based: 14
+  page(s): '15'
+
+  category: 0-RETRIEVE
+  correctness: |-
+    the answer mentions Employee costs or Employee liabilities
+
+
+financebench_id_01928:
+  sector: Materials
+
+  company: Amcor
+  period: 2023
+  doc-type: Earnings
+  doc: AMCOR_2023Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What Was AMCOR's Adjusted Non GAAP EBITDA for FY 2023
+
+  answer: AMCOR's Adj. EBITDA was $2,018mn in FY 2023
+  justification: ''
+  page(s)-0based: 11
+  page(s): '12'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    2018 million, 2.018 billion,
+    2000 million or 2 billion
+
+  evaluator-unreliable: true
+
+
+financebench_id_01930:
+  sector: Materials
+
+  company: Amcor
+  period: 2023
+  doc-type: Earnings
+  doc: AMCOR_2023Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: How much was the Real change in Sales for AMCOR in FY 2023 vs FY 2022,
+    if we exclude the impact of FX movement, passthrough costs and one-off items?
+
+  answer: The Real Growth was flat in FY 2023 vs FY 2022.
+  justification: ''
+  page(s)-0based: 9
+  page(s): '10'
+
+  category: 2-CALC-CHANGE
+  correctness: >-
+    the answer concludes that the percentage change was approximately 1%,
+    or, alternatively, concludes that the growth was flat / small
+
+  evaluator-unreliable: true
+
+
+financebench_id_03069:
+  sector: Information Technology
+
+  company: AMD
+  period: 2015
+  doc-type: 10k
+  doc: AMD_2015_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: Answer the following question as if you are an equity research analyst
+    and have lost internet connection so you do not have access to financial metric
+    providers. According to the details clearly outlined within the P&L statement
+    and the statement of cash flows, what is the FY2015 depreciation and amortization
+    (D&A from cash flow statement) % margin for AMD?
+
+  answer: 4.2%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Depreciation and amortization. This metric was located in the 10K as
+    a single line item named: Depreciation and amortization.
+
+
+    Metric 2: Total revenue. This metric was located in the 10K as a single line item
+    named: Net revenue.'
+  page(s)-0based: 55
+  page(s): 56,60
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Depreciation & Amortization (D&A) Margin (using Net Revenue as denominator)
+    percentage value that is in the range from 4.00% to 4.50%,
+    or, alternatively, a calculated decimal value that is in the range from 0.0400 to 0.0450
+    (if the answer is a single number, assume that it is that calculated Depreciation & Amortization (D&A) Margin metric value)
+
+
+financebench_id_00222:
+  sector: Information Technology
+
+  company: AMD
+  period: 2022
+  doc-type: 10k
+  doc: AMD_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning) OR Logical
+    reasoning
+  domain-question-num: dg01
+  question: Does AMD have a reasonably healthy liquidity profile based on its quick
+    ratio for FY22? If the quick ratio is not relevant to measure liquidity, please
+    state that and explain why.
+
+  answer: Yes. The quick ratio is 1.57, calculated as (cash and cash equivalents+Short
+    term investments+Accounts receivable, net+receivables from related parties)/ (current
+    liabilities).
+  justification: ''
+  page(s)-0based: 55
+  page(s): '56'
+
+  category: 4-CALC-AND-JUDGE
+  correctness: >-
+    the answer contains a calculated Quick Ratio decimal value that is in the range from 1.40 to 1.90,
+    or, alternatively, a calculated percentage value that is in the range from 140% to 190%
+
+
+financebench_id_00995:
+  sector: Information Technology
+
+  company: AMD
+  period: 2022
+  doc-type: 10k
+  doc: AMD_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg07
+  question: What are the major products and services that AMD sells as of FY22?
+
+  answer: AMD sells server microprocessors (CPUs) and graphics processing units (GPUs),
+    data processing units (DPUs), Field Programmable Gate Arrays (FPGAs), and Adaptive
+    System-on-Chip (SoC) products for data centers; CPUs, accelerated processing units
+    (APUs) that integrate CPUs and GPUs, and chipsets for desktop and notebook personal
+    computers; discrete GPUs, and semi-custom SoC products and development services;
+    and embedded CPUs, GPUs, APUs, FPGAs, and Adaptive SoC products.
+  justification: ''
+  page(s)-0based: 3
+  page(s): '4'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions at least graphics (i.e., GPU) and FPGA products
+
+  evaluator-unreliable: true
+
+
+financebench_id_01198:
+  sector: Information Technology
+
+  company: AMD
+  period: 2022
+  doc-type: 10k
+  doc: AMD_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg15
+  question: What drove revenue change as of the FY22 for AMD?
+
+  answer: In 2022, AMD reported Higher sales of their EPYC server processors, higher
+    semi-custom product sales, and the inclusion of Xilinx embedded product sales
+  justification: ''
+  page(s)-0based: 42
+  page(s): '43'
+
+  category: 0-RETRIEVE
+  correctness: |-
+    the answer mentions at least 2 of the following:
+    - "Data Center" and/or "EPYC";
+    - "Gaming" and/or "semi-custom"; and
+    - "Embedded" and/or "Xilinx"
+
+  evaluator-unreliable: true
+
+
+financebench_id_00917:
+  sector: Information Technology
+
+  company: AMD
+  period: 2022
+  doc-type: 10k
+  doc: AMD_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning) OR Numerical
+    reasoning OR Logical reasoning
+  domain-question-num: dg17
+  question: What drove operating margin change as of the FY22 for AMD? If operating
+    margin is not a useful metric for a company like this, then please state that
+    and explain why.
+
+  answer: The decrease in AMD's operating income was primarily driven by amortization
+    of intangible assets associated with the Xilinx acquisition
+  justification: ''
+  page(s)-0based: 42
+  page(s): '43'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions Xilinx
+
+
+financebench_id_01279:
+  sector: Information Technology
+
+  company: AMD
+  period: 2022
+  doc-type: 10k
+  doc: AMD_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning
+  domain-question-num: dg19
+  question: Among operations, investing, and financing activities, which brought in
+    the most (or lost the least) cash flow for AMD in FY22?
+
+  answer: In 2022, AMD brought in the most cashflow from Operations
+  justification: ''
+  page(s)-0based: 57
+  page(s): '58'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Operations / Operating Cash Flows as bringing in most cash
+
+
+financebench_id_00563:
+  sector: Information Technology
+
+  company: AMD
+  period: 2022
+  doc-type: 10k
+  doc: AMD_2022_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: From FY21 to FY22, excluding Embedded, in which AMD reporting segment
+    did sales proportionally increase the most?
+
+  answer: Data Center
+  justification: "Data center: \nFY22: 6,043\nFY21: 3,694 \n6,043/3,694-1 = 63,59%\n\
+    \nClient: \nFY22: 6,201\nFY21: 6,887 \n6,201/6,887-1 = -9,96%\n\n\nGaming: \n\
+    FY22: 6,805\nFY21: 5,607 \n6,805/5,607-1 = 21,37%"
+  page(s)-0based: 47
+  page(s): '48'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Data Center segment as proportionally growing most strongly
+
+
+financebench_id_00757:
+  sector: Information Technology
+
+  company: AMD
+  period: 2022
+  doc-type: 10k
+  doc: AMD_2022_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Did AMD report customer concentration in FY22?
+
+  answer: Yes, one customer accounted for 16% of consolidated net revenue
+  justification: One customer ccounting for 16% of net evenue is a high customer concenration
+  page(s)-0based: 11
+  page(s): '12'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions that one or a small number of customers
+    accounted for large portion of revenue
+
+  evaluator-unreliable: true
+
+
+financebench_id_00476:
+  sector: Financials
+
+  company: American Express
+  period: 2022
+  doc-type: 10k
+  doc: AMERICANEXPRESS_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg04
+  question: Which debt securities are registered to trade on a national securities
+    exchange under American Express' name as of 2022?
+
+  answer: There are none
+  justification: No debt securities are listed under the securities registered pursuant
+    to Section 12(b) of the Act, which implies there are none
+  page(s)-0based: 0
+  page(s): '1'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer concludes that there are no debt securities traded,
+    or, alternatively, that no such debt securities are explicitly reported
+
+  evaluator-unreliable: true
+
+
+financebench_id_01028:
+  sector: Financials
+
+  company: American Express
+  period: 2022
+  doc-type: 10k
+  doc: AMERICANEXPRESS_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg08
+  question: What are the geographies that American Express primarily operates in as
+    of 2022?
+
+  answer: United States, EMEA, APAC, and LACC
+  justification: ''
+  page(s)-0based: 154
+  page(s): '155'
+
+  category: 0-RETRIEVE
+  correctness: |-
+    the answer mentions at least 3 among:
+    - United States (US);
+    - Europe, the Middle East and Africa (EMEA);
+    - Asia Pacific, Australia and New Zealand (APAC); and
+    - Latin America, Canada and the Caribbean (LACC)
+
+
+financebench_id_00723:
+  sector: Financials
+
+  company: American Express
+  period: 2022
+  doc-type: 10k
+  doc: AMERICANEXPRESS_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning OR information extraction
+  domain-question-num: dg14
+  question: Does AMEX have an improving operating margin profile as of 2022? If operating
+    margin is not a useful metric for a company like this, then state that and explain
+    why.
+
+  answer: Performance is not measured through operating margin
+  justification: It's a financial services company and performance is measured through
+    the Net Interest Margin.
+  page(s)-0based: 95
+  page(s): '96'
+
+  category: 6-OTHER-ADVANCED
+  correctness: >-
+    the answer argues that Operating Margin is not a very relevant/useful metric for this business model and/or industry,
+    or, alternatively, that performance in this business model and/or industry is usually not judged through Operating Margin
+
+  evaluator-unreliable: true
+
+
+financebench_id_00720:
+  sector: Financials
+
+  company: American Express
+  period: 2022
+  doc-type: 10k
+  doc: AMERICANEXPRESS_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning) OR Numerical
+    reasoning OR Logical reasoning
+  domain-question-num: dg16
+  question: What drove gross margin change as of the FY2022 for American Express?
+    If gross margin is not a useful metric for a company like this, then please state
+    that and explain why.
+
+  answer: Performance is not measured through gross margin
+  justification: It's a financial services company and performance is measured through
+    the Net Interest Margin.
+  page(s)-0based: 95
+  page(s): '96'
+
+  category: 6-OTHER-ADVANCED
+  correctness: >-
+    the answer argues that Gross Margin is not a very relevant/useful metric for this business model and/or industry,
+    or, alternatively, that performance in this business model and/or industry is usually not judged through Gross Margin
+
+  evaluator-unreliable: true
+
+
+financebench_id_01351:
+  sector: Financials
+
+  company: American Express
+  period: 2022
+  doc-type: 10k
+  doc: AMERICANEXPRESS_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning
+  domain-question-num: dg23
+  question: How much has the effective tax rate of American Express changed between
+    FY2021 and FY2022?
+
+  answer: The effective tax rate for American Express has changed/dropped from 24.6%
+    in FY 2021 to 21.6% in FY 2022.
+  justification: ''
+  page(s)-0based: 43
+  page(s): '44'
+
+  category: 2-CALC-CHANGE
+  correctness: >-
+    the answer says Effective Tax Rate changed from 24.6% to 21.6%,
+    and/or that it decreased by 3 pencentage points or 3%
+
+  evaluator-unreliable: true
+
+
+financebench_id_01964:
+  sector: Financials
+
+  company: American Express
+  period: 2022
+  doc-type: 10k
+  doc: AMERICANEXPRESS_2022_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What was the largest liability in American Express's Balance Sheet in
+    2022?
+
+  answer: Customer deposits
+  justification: ''
+  page(s)-0based: 97
+  page(s): '98'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Customer Deposits as largest liability
+
+  evaluator-unreliable: true
+
+
+financebench_id_01981:
+  sector: Financials
+
+  company: American Express
+  period: 2022
+  doc-type: 10k
+  doc: AMERICANEXPRESS_2022_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Was American Express able to retain card members during 2022?
+
+  answer: 'Yes'
+  justification: ''
+  page(s)-0based: 44
+  page(s): '45'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer affirms that retention was good/high
+
+  evaluator-unreliable: true
+
+
+financebench_id_05718:
+  sector: Utilities
+
+  company: American Water Works
+  period: 2020
+  doc-type: 10k
+  doc: AMERICANWATERWORKS_2020_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: How much (in USD billions) did American Water Works pay out in cash dividends
+    for FY2020? Compute or extract the answer by primarily using the details outlined
+    in the statement of cash flows.
+
+  answer: $0.40
+  justification: 'The metric total cash dividends paid out was directly extracted
+    from the company 10K. The line item name, as seen in the 10K, was: Dividends paid.'
+  page(s)-0based: 85
+  page(s): '86'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    0.389, 0.389 billion, 389 million,
+    0.4, 0.4 billion or 400 million
+
+
+financebench_id_04254:
+  sector: Utilities
+
+  company: American Water Works
+  period: 2021
+  doc-type: 10k
+  doc: AMERICANWATERWORKS_2021_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: Basing your judgments off of the cash flow statement and the income statement,
+    what is American Water Works's FY2021 unadjusted operating income + depreciation
+    and amortization from the cash flow statement (unadjusted EBITDA) in USD millions?
+
+  answer: $1832.00
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Depreciation and amortization. This metric was located in the 10K as
+    a single line item named: Depreciation and amortization.
+
+
+    Metric 2: Unadjusted operating income. This metric was located in the 10K as a
+    single line item named: Operating income.'
+  page(s)-0based: 85
+  page(s): 86,88
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    1832, 1832 million, 1.832 billion,
+    1800, 1800 million or 1.8 billion
+
+
+financebench_id_00070:
+  sector: Utilities
+
+  company: American Water Works
+  period: 2022
+  doc-type: 10k
+  doc: AMERICANWATERWORKS_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning OR Logical reasoning
+  domain-question-num: dg24
+  question: Does American Water Works have positive working capital based on FY2022
+    data? If working capital is not a useful or relevant metric for this company,
+    then please state that and explain why.
+
+  answer: No, American Water Works had negative working capital of -$1561M in FY 2022.
+  justification: 'Accounts receivable+Income tax receivable+Unbilled revenues+Materials
+    and supplies+other-Accounts payable-Accrued liabilities-Accrued taxes
+
+    334+114+275+98+312-254-706-49'
+  page(s)-0based: 80
+  page(s): 81,82
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated (Net) Working Capital metric value in dollars
+    that is NEGATIVE and equivalent to or approximately equal to
+    minus/negative 1561, minus/negative 1561 million, minus/negative 1.561 billion,
+    minus/negative 1600, minus/negative 1600 million or minus/negative 1.6 billion
+
+  evaluator-unreliable: true
+
+
+financebench_id_02608:
+  sector: Consumer Discretionary
+
+  company: Best Buy
+  period: 2017
+  doc-type: 10k
+  doc: BESTBUY_2017_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: In agreement with the information outlined in the income statement, what
+    is the FY2015 - FY2017 3 year average net profit margin (as a %) for Best Buy?
+    Answer in units of percents and round to one decimal place.
+
+  answer: 2.8%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Total revenue. This metric was located in the 10K as a single line item
+    named: Revenue.
+
+
+    Metric 2: Net income. This metric was located in the 10K as a single line item
+    named: Net earnings attributable to Best Buy Co., Inc. shareholders.'
+  page(s)-0based: 55
+  page(s): '56'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Average Net Profit Margin percentage value that is in the range from 2.50% to 3.00%,
+    or, alternatively, a calculated decimal value that is in the range from 0.0250 to 0.0300
+    (if the answer is a single number, assume that it is that calculated Average Net Profit Margin metric value)
+
+
+financebench_id_04417:
+  sector: Consumer Discretionary
+
+  company: Best Buy
+  period: 2019
+  doc-type: 10k
+  doc: BESTBUY_2019_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: What is the year end FY2019 total amount of inventories for Best Buy?
+    Answer in USD millions. Base your judgments on the information provided primarily
+    in the balance sheet.
+
+  answer: $5409.00
+  justification: 'The metric inventories was directly extracted from the company 10K.
+    The line item name, as seen in the 10K, was: Merchandise inventories.'
+  page(s)-0based: 51
+  page(s): '52'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    5409, 5409 million, 5.409 billion,
+    5400, 5400 million or 5.4 billion
+
+
+financebench_id_00685:
+  sector: Consumer Discretionary
+
+  company: Best Buy
+  period: 2023
+  doc-type: 10k
+  doc: BESTBUY_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning) OR Logical
+    reasoning
+  domain-question-num: dg03
+  question: Are Best Buy's gross margins historically consistent (not fluctuating
+    more than roughly 2% each year)? If gross margins are not a relevant metric for
+    a company like this, then please state that and explain why.
+
+  answer: Yes, the margins have been consistent, there has been a minor decline of
+    1.1% in gross margins between FY2022 and FY2023.
+  justification: 'Gross Profit/Revenue
+
+    9912/46298
+
+    11640/51761'
+  page(s)-0based: 39
+  page(s): '40'
+
+  category: 4-CALC-AND-JUDGE
+  correctness: >-
+    the answer contains calculated Gross Margin
+    percentage values for 2022 and 2023 that are within 2 percentage points (or 2%) of each other,
+    or, alternatively, calculated decimal values that are within 0.02 of each other
+  answer-inadequate: true
+
+
+financebench_id_01077:
+  sector: Consumer Discretionary
+
+  company: Best Buy
+  period: 2023
+  doc-type: 10k
+  doc: BESTBUY_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg10
+  question: What are major acquisitions that Best Buy has done in FY2023, FY2022 and
+    FY2021?
+
+  answer: 'Best Buy closed two acquisitions, both these companies were already partially
+    owned by Best Buy, but Best Buy acquired all outstanding shares of these two companies
+    during FY 2022: (1) Current Health Ltd and (2) Two Peaks, LLC d/b/a Yardbird Furniture'
+  justification: ''
+  page(s)-0based: 50
+  page(s): '51'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions Current Health and Two Peaks (which is also alternatively called Yardbird)
+
+
+financebench_id_01275:
+  sector: Consumer Discretionary
+
+  company: Best Buy
+  period: 2023
+  doc-type: 10k
+  doc: BESTBUY_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning
+  domain-question-num: dg19
+  question: Among operations, investing, and financing activities, which brought in
+    the most (or lost the least) cash flow for Best Buy in FY2023?
+
+  answer: Best Buy generated the most cash flow from operating activities in FY 2023
+    ($1.8 bn)
+  justification: ''
+  page(s)-0based: 41
+  page(s): '42'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies that Operations / Operating Cash Flows as bringing in most cash
+
+
+financebench_id_00288:
+  sector: Consumer Discretionary
+
+  company: Best Buy
+  period: 2024
+  doc-type: 10q
+  doc: BESTBUY_2024Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Was there any drop in Cash & Cash equivalents between FY 2023 and Q2 of
+    FY2024?
+
+  answer: Yes, there was a decline of ~42% between FY2023 and Q2 of FY 2024.
+  justification: 1093/1874-1
+  page(s)-0based: 19
+  page(s): '20'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer affirms that Cash & Cash Equivalents decreased
+
+
+financebench_id_00460:
+  sector: Consumer Discretionary
+
+  company: Best Buy
+  period: 2024
+  doc-type: 10q
+  doc: BESTBUY_2024Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Was there any change in the number of Best Buy stores between Q2 of FY2024
+    and FY2023?
+
+  answer: Yes, there is decline in number stores by 1.32% from 982 stores in Q2 FY
+    2023 to 969 by the end of Q2 FY2024.
+  justification: 969/982-1
+  page(s)-0based: 16
+  page(s): '17'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer mentions that number of stores decreased
+
+  evaluator-unreliable: true
+
+
+financebench_id_01902:
+  sector: Consumer Discretionary
+
+  company: Best Buy
+  period: 2024
+  doc-type: 10q
+  doc: BESTBUY_2024Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Which Best Buy product category performed the best (by top line) in the
+    domestic (USA) Market during Q2 of FY2024?
+
+  answer: The entertainment segment experienced the highest growth of 9% during Q2
+    FY2024, primarily from gaming division.
+  justification: ''
+  page(s)-0based: 17
+  page(s): '18'
+
+  category: 1-COMPARE
+  correctness: |-
+    the answer either:
+    - identifies Entertainment (or Gaming) category/segment as proportionally growing most; or
+    - identifies Computing and Mobile Phones category/segment as having highest revenue
+
+  evaluator-unreliable: true
+
+
+financebench_id_04660:
+  sector: Information Technology
+
+  company: Block
+  period: 2016
+  doc-type: 10k
+  doc: BLOCK_2016_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: Considering the data in the balance sheet, what is Block's (formerly known
+    as Square) FY2016 working capital ratio? Define working capital ratio as total
+    current assets divided by total current liabilities. Round your answer to two
+    decimal places.
+
+  answer: '1.73'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Total current liabilities. This metric was located in the 10K as a single
+    line item named: Total current liabilities.
+
+
+    Metric 2: Total current assets. This metric was located in the 10K as a single
+    line item named: Total current assets.'
+  page(s)-0based: 67
+  page(s): '68'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Working Capital Ratio decimal value that is in the range from 1.70 to 1.80,
+    or, alternatively, a calculated percentage value that is in the range from 170% to 180%
+    (if the answer is a single number, assume that it is that calculated Working Capital Ratio metric value)
+
+
+financebench_id_03838:
+  sector: Information Technology
+
+  company: Block
+  period: 2020
+  doc-type: 10k
+  doc: BLOCK_2020_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is the FY2019 - FY2020 total revenue growth rate for Block (formerly
+    known as Square)? Answer in units of percents and round to one decimal place.
+    Approach the question asked by assuming the standpoint of an investment banking
+    analyst who only has access to the statement of income.
+
+  answer: 101.5%
+  justification: 'The metric total revenue was directly extracted from the company
+    10K. The line item name, as seen in the 10K, was: Total net revenue. The final
+    step was to execute the desired percent change calculation on total revenue.'
+  page(s)-0based: 85
+  page(s): '86'
+
+  category: 2-CALC-CHANGE
+  correctness: >-
+    the answer contains a calculated Revenue growth percentage value that is over 100.0%
+    (if the answer is a single number, assume that it is that calculated Revenue growth percentage value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_07661:
+  sector: Information Technology
+
+  company: Block
+  period: 2020
+  doc-type: 10k
+  doc: BLOCK_2020_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: 'Using the cash flow statement, answer the following question to the best
+    of your abilities: how much did Block (formerly known as Square) generate in cash
+    flow from operating activities in FY2020? Answer in USD millions.'
+
+  answer: $382.00
+  justification: 'The metric cash from operations was directly extracted from the
+    company 10K. The line item name, as seen in the 10K, was: Net cash provided by
+    operating activities.'
+  page(s)-0based: 89
+  page(s): '90'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    381.6, 381.6 million, 0.3816 billion,
+    382, 382 million, 0.382 billion,
+    400, 400 million or 0.4 billion
+
+
+financebench_id_10285:
+  sector: Industrials
+
+  company: Boeing
+  period: 2018
+  doc-type: 10k
+  doc: BOEING_2018_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: 'We need to calculate a financial metric by using information only provided
+    within the balance sheet. Please answer the following question: what is Boeing''s
+    year end FY2018 net property, plant, and equipment (in USD millions)?'
+
+  answer: $12645.00
+  justification: 'The metric ppne, net was directly extracted from the company 10K.
+    The line item name, as seen in the 10K, was: Property, plant and equipment, net.'
+  page(s)-0based: 51
+  page(s): '52'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    12645, 12645 million, 12.645 billion,
+    12600, 12600 million or 12.6 billion
+
+  evaluator-unreliable: true
+
+
+financebench_id_00517:
+  sector: Industrials
+
+  company: Boeing
+  period: 2022
+  doc-type: 10k
+  doc: BOEING_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning)
+  domain-question-num: dg09
+  question: Are there any product categories / service categories that represent more
+    than 20% of Boeing's revenue for FY2022?
+
+  answer: Yes. Boeing has product and service categories that represent more than
+    20% of Boeing's revenue for FY2022. These categories are Commercial Airplanes
+    which comprises 39% of total revenue, Defence which comprises 35% of total revenue
+    and Services which comprises 26% of total revenue.
+  justification: 'Commercial Airplanes%=Revenues: Commercial Airplanes/Total revenues*100=25,867/66,608*100=39%.
+    Defence%=Defense, Space & Security/Total revenues*100=23,162/66,608*100=35%. Services%=Global
+    Services/Total revenues*100=17,611/66,608*100=26%.'
+  page(s)-0based: 61
+  page(s): '62'
+
+  category: 3-CALC-COMPLEX
+  correctness: |-
+    the answer mentions at least 1 of following categories:
+    - Commercial Airplanes;
+    - Defense/Defence (or fully written "Defense, Space & Security"); and
+    - Services (or fully written "Global Services")
+
+  evaluator-unreliable: true
+
+
+financebench_id_01091:
+  sector: Industrials
+
+  company: Boeing
+  period: 2022
+  doc-type: 10k
+  doc: BOEING_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg11
+  question: Has Boeing reported any materially important ongoing legal battles from
+    FY2022?
+
+  answer: Yes. Multiple lawsuits have been filed against Boeing resulting from a 2018
+    Lion Air crash and a 2019 Ethiopian Airlines crash.
+  justification: ''
+  page(s)-0based: 112
+  page(s): '113'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer affirms that there have been material lawsuits / legal battles
+
+  evaluator-unreliable: true
+
+
+financebench_id_00678:  # note: Gross Income is implicit, with missing label
+  sector: Industrials
+
+  company: Boeing
+  period: 2022
+  doc-type: 10k
+  doc: BOEING_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning OR information extraction
+  domain-question-num: dg13
+  question: Does Boeing have an improving gross margin profile as of FY2022? If gross
+    margin is not a useful metric for a company like this, then state that and explain
+    why.
+
+  answer: Yes. Boeing has an improving gross margin profile as of FY2022. Gross profit
+    improved from $3,017 million in FY2021 to $3,502 million in FY2022. Gross margin
+    % improved from 4.8% in FY2021 to 5.3% in FY2022.
+  justification: Gross margin%=Gross margin/Total revenues*100=3,502/66,608*100=5.3%
+    for 2022 and 3,017/62,286*100=4.8% for 2021.
+  page(s)-0based: 54
+  page(s): '55'
+
+  category: 4-CALC-AND-JUDGE
+  correctness: >-
+    the answer contains calculated Gross Margin percentage or decimal values for 2021 and 2022,
+    and concludes that such metric increased
+
+  evaluator-unreliable: true
+
+
+financebench_id_01290:
+  sector: Industrials
+
+  company: Boeing
+  period: 2022
+  doc-type: 10k
+  doc: BOEING_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction OR Logical reasoning
+  domain-question-num: dg20
+  question: Who are the primary customers of Boeing as of FY2022?
+
+  answer: Boeing's primary customers as of FY2022 are a limited number of commercial
+    airlines and the US government. The US government accounted for 40% of Boeing's
+    total revenues in FY2022.
+  justification: ''
+  page(s)-0based: 7
+  page(s): 8, 10, 14
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions airlines and government(s) / military(ies)
+
+  evaluator-unreliable: true
+
+
+financebench_id_00464:
+  sector: Industrials
+
+  company: Boeing
+  period: 2022
+  doc-type: 10k
+  doc: BOEING_2022_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Is Boeing's business subject to cyclicality?
+
+  answer: Yes, Boeing's business is subject to cyclicality due to its exposure to
+    the airline industry which is a cyclical industry.
+  justification: A major portion of Boeing's revenue is derived from the sale of aircraft
+    to commercial airlines. The commercial airlines business is cyclical, and subject
+    to significant profit swings.
+  page(s)-0based: 7
+  page(s): '8'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer affirms that cyclicality is present
+
+
+financebench_id_00494:
+  sector: Industrials
+
+  company: Boeing
+  period: 2022
+  doc-type: 10k
+  doc: BOEING_2022_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What production rate changes is Boeing forecasting for FY2023?
+
+  answer: Boeing forecasts an increase in the production rates for the 737, 777X and
+    787 aircrafts in 2023.
+  justification: Boeing plans to gradually increase production rates for the 737 and
+    787 and to resume production of 777X.
+  page(s)-0based: 8
+  page(s): '9'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions increase(s) in production rate(s)
+
+
+financebench_id_00585:  # note: correct number signs
+  sector: Industrials
+
+  company: Boeing
+  period: 2022
+  doc-type: 10k
+  doc: BOEING_2022_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: How does Boeing's effective tax rate in FY2022 compare to FY2021?
+
+  answer: Effective tax rate in FY2022 was 0.62%, compared to  -14.76% in FY2021.
+  justification: Effective tax rate=Income tax (expense) benefit/ Loss before income
+    taxes*100=(31)/(5,022)*100=0.62% in 2022 and 743/(5,033)*100=-14.76%.
+  page(s)-0based: 54
+  page(s): '55'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains calculated Effective Tax Rate percentage or decimal values for 2021 and 2022,
+    with one value being negative and the other value being positive
+
+  evaluator-unreliable: true
+
+
+financebench_id_03473:
+  sector: Consumer Staples
+
+  company: Coca-Cola
+  period: 2017
+  doc-type: 10k
+  doc: COCACOLA_2017_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'What is the FY2017 return on assets (ROA) for Coca Cola? ROA is defined
+    as: FY2017 net income / (average total assets between FY2016 and FY2017). Round
+    your answer to two decimal places. Give a response to the question by relying
+    on the details shown in the balance sheet and the P&L statement.'
+
+  answer: '0.01'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Net income. This metric was located in the 10K as a single line item
+    named: NET INCOME ATTRIBUTABLE TO SHAREOWNERS OF THE COCA-COLA COMPANY.
+
+
+    Metric 2: Total assets. This metric was located in the 10K as a single line item
+    named: TOTAL ASSETS.'
+  page(s)-0based: 73
+  page(s): 74,76
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Return on Assets (RoA) percentage value that is in the range from 0.90% to 2.00%,
+    or, alternatively, a calculated decimal value that is in the range from 0.0090 to 0.0200
+    (if the answer is a single number, assume that it is that calculated Return on Assets (RoA) metric value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_09724:
+  sector: Consumer Staples
+
+  company: Coca-Cola
+  period: 2021
+  doc-type: 10k
+  doc: COCACOLA_2021_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is Coca Cola's FY2021 COGS % margin? Calculate what was asked by
+    utilizing the line items clearly shown in the income statement.
+
+  answer: 39.7%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Cost of goods sold. This metric was located in the 10K as a single line
+    item named: Cost of goods sold.
+
+
+    Metric 2: Total revenue. This metric was located in the 10K as a single line item
+    named: Net Operating Revenues.'
+  page(s)-0based: 61
+  page(s): '62'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Cost of Goods Sold (COGS) Margin
+    percentage value that is in the range from 38.00% to 42.00%,
+    or, alternatively, a calculated decimal value that is in the range from 0.3800 to 0.4200
+    (if the answer is a single number, assume that it is that calculated Cost of Goods Sold (COGS) Margin metric value)
+
+
+financebench_id_06272:
+  sector: Consumer Staples
+
+  company: Coca-Cola
+  period: 2022
+  doc-type: 10k
+  doc: COCACOLA_2022_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is Coca Cola's FY2022 dividend payout ratio (using total cash dividends
+    paid and net income attributable to shareholders)? Round answer to two decimal
+    places. Answer the question asked by assuming you only have access to information
+    clearly displayed in the cash flow statement and the income statement.
+
+  answer: '0.8'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Total cash dividends paid out. This metric was located in the 10K as
+    a single line item named: Dividends.
+
+
+    Metric 2: Net income. This metric was located in the 10K as a single line item
+    named: Net Income Attributable to Shareowners of The Coca-Cola Company.'
+  page(s)-0based: 62
+  page(s): 63,66
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Dividend Payout Ratio decimal value that is in the range from 0.7800 to 0.8200,
+    or, alternatively, a calculated percentage value that is in the range from 78.00% to 82.00%
+    (if the answer is a single number, assume that it is that calculated Dividend Payout Ratio metric value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_10130:
+  sector: Information Technology
+
+  company: Corning
+  period: 2020
+  doc-type: 10k
+  doc: CORNING_2020_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'Based on the information provided primarily in the balance sheet and
+    the statement of income, what is FY2020 days payable outstanding (DPO) for Corning?
+    DPO is defined as: 365 * (average accounts payable between FY2019 and FY2020)
+    / (FY2020 COGS + change in inventory between FY2019 and FY2020). Round your answer
+    to two decimal places.'
+
+  answer: '63.86'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Accounts payable. This metric was located in the 10K as a single line
+    item named: AccountsÂ payable.
+
+
+    Metric 2: Inventories. This metric was located in the 10K as a single line item
+    named: Inventories, net (Note 6).
+
+
+    Metric 3: Cost of goods sold. This metric was located in the 10K as a single line
+    item named: CostÂ ofÂ sales.'
+  page(s)-0based: 69
+  page(s): 70,72
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Days Payable Outstanding (DPO) decimal value that is in the range from 60.00 to 70.00
+    (if the answer is a single number, assume that it is that calculated Days Payable Outstanding (DPO) decimal value)
+
+
+financebench_id_02981:
+  sector: Information Technology
+
+  company: Corning
+  period: 2021
+  doc-type: 10k
+  doc: CORNING_2021_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: Taking into account the information outlined in the income statement,
+    what is the FY2019 - FY2021 3 year average unadjusted operating income % margin
+    for Corning? Answer in units of percents and round to one decimal place.
+
+  answer: 10.3%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Unadjusted operating income. This metric was located in the 10K as a
+    single line item named: OperatingÂ income.
+
+
+    Metric 2: Total revenue. This metric was located in the 10K as a single line item
+    named: NetÂ sales.'
+  page(s)-0based: 64
+  page(s): '65'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer constains a calculated Average Operating Income Margin percentage value that is in the range from 9.00% to 12.00%,
+    or, alternatively, a calculated decimal value that is in the range from 0.0900 to 0.1200
+    (if the answer is a single number, assume that it is that calculated Average Operating Income Margin metric value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_01346:
+  sector: Information Technology
+
+  company: Corning
+  period: 2022
+  doc-type: 10k
+  doc: CORNING_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning
+  domain-question-num: dg23
+  question: How much has the effective tax rate of Corning changed between FY2021
+    and FY2022?
+
+  answer: The effective tax rate of Corning has changed from 20% in FY2021 to 23%
+    in FY 2022.
+  justification: ''
+  page(s)-0based: 23
+  page(s): '24'
+
+  category: 2-CALC-CHANGE
+  correctness: >-
+    the answer says that Effective Tax Rate changed
+    from approximately 20.2% (or 20%) to approximately 22.9% (or 23%),
+    and/or that it increased by approximately 2.6, 2.7 or 3 percentage points
+    (or 2.6%, 2.7%, or 3%)
+
+  evaluator-unreliable: true
+
+
+financebench_id_00005:
+  sector: Information Technology
+
+  company: Corning
+  period: 2022
+  doc-type: 10k
+  doc: CORNING_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning OR Logical reasoning
+  domain-question-num: dg24
+  question: Does Corning have positive working capital based on FY2022 data? If working
+    capital is not a useful or relevant metric for this company, then please state
+    that and explain why.
+
+  answer: Yes. Corning had a positive working capital amount of $831 million by FY
+    2022 close. This answer considers only operating current assets and current liabilities
+    that were clearly shown in the balance sheet.
+  justification: 'Trade accounts receivable, net of doubtful accounts+Inventories+Other
+    current assets-Accounts payable-Other accrued liabilities
+
+    1721+2904+1157-1804-3147'
+  page(s)-0based: 59
+  page(s): '60'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer affirms that Working Capital is/was positive,
+    proving so by a calculated Working Capital metric value that is positive
+
+
+financebench_id_04209:
+  sector: Consumer Staples
+
+  company: Costco
+  period: 2021
+  doc-type: 10k
+  doc: COSTCO_2021_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: Using only the information within the balance sheet, how much total assets
+    did Costco have at the end of FY2021? Answer in USD millions.
+
+  answer: $59268.00
+  justification: 'The metric total assets was directly extracted from the company
+    10K. The line item name, as seen in the 10K, was: TOTAL ASSETS.'
+  page(s)-0based: 37
+  page(s): '38'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity equivalent to or approximately equal to
+    59268, 59268 million, 59.268 billion,
+    59300, 59300 million, 59.3 billion
+    59000, 59000 million or 59 billion
+
+
+financebench_id_05915:
+  sector: Health Care
+
+  company: CVS Health
+  period: 2018
+  doc-type: 10k
+  doc: CVSHEALTH_2018_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'What is the FY2018 fixed asset turnover ratio for CVS Health? Fixed asset
+    turnover ratio is defined as: FY2018 revenue / (average PP&E between FY2017 and
+    FY2018). Round your answer to two decimal places. Calculate what was asked by
+    utilizing the line items clearly shown in the P&L statement and the balance sheet.'
+
+  answer: '17.98'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Total revenue. This metric was located in the 10K as a single line item
+    named: Total revenues.
+
+
+    Metric 2: Ppne, net. This metric was located in the 10K as a single line item
+    named: Property and equipment, net.'
+  page(s)-0based: 301
+  page(s): 302,304
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer constains a calculated Fixed Asset Turnover Ratio decimal value that is in the range from 17.00 to 19.00
+    (if the answer is a single number, assume that it is that calculated Fixed Asset Turnover Ratio decimal value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_00790:
+  sector: Health Care
+
+  company: CVS Health
+  period: 2022
+  doc-type: 10k
+  doc: CVSHEALTH_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning)
+  domain-question-num: dg06
+  question: Is CVS Health a capital-intensive business based on FY2022 data?
+
+  answer: Yes, CVS Health requires an extensive asset base to operate, which is evident
+    from its ROA of only 1.82% in 2022 and 3.39% in 2021, though it should be noted
+    that a significant portion of this asset base is goodwill, and CVS's fixed assets/total
+    assets ratio is on the lower side of 5.6%.
+  justification: 'Property and equipment, net/Total Assets
+
+    12873/228275
+
+
+    ROA=Net Income/Total Assets
+
+    4165/228275
+
+    7898/232999'
+  page(s)-0based: 107
+  page(s): 108,110
+
+  category: 6-OTHER-ADVANCED
+  correctness: |-
+    the answer either:
+    - mentions that a calculated Return on Assets (RoA) metric value is quite low (which suggests capital intensity); or
+    - mentions that Fixed Assets form only a small proportion of Total Assets (which suggests the reverse)
+
+  evaluator-unreliable: true
+
+
+financebench_id_01107:
+  sector: Health Care
+
+  company: CVS Health
+  period: 2022
+  doc-type: 10k
+  doc: CVSHEALTH_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg11
+  question: Has CVS Health reported any materially important ongoing legal battles
+    from 2022, 2021 and 2020?
+
+  answer: "Yes, CVS Health has been involved in multiple ongoing legal battles. Some\
+    \ notable legal dispute areas for CVS are: (1) usual and customary pricing litigation:\
+    \ where it's claimed that CVSâ\x80\x99s retail pharmacies overcharged for prescription\
+    \ drugs; (2) PBM litigation and investigations: where it's claimed that that rebate\
+    \ agreements between the drug manufacturers and PBMs caused inflated prices for\
+    \ certain drug products; and (3) controlled substances litigation: legal matters\
+    \ around opioids for which CVS has agreed to pay up to $4.3 billion to claimants\
+    \ in remediation and $625 million to attorneys and fees"
+  justification: ''
+  page(s)-0based: 172
+  page(s): 173,173,174
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer affirms that there have been material lawsuits / legal battles
+
+
+financebench_id_01244:
+  sector: Health Care
+
+  company: CVS Health
+  period: 2022
+  doc-type: 10k
+  doc: CVSHEALTH_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg18
+  question: Has CVS Health paid dividends to common shareholders in Q2 of FY2022?
+
+  answer: Yes, CVS paid a $ 0.55 dividend per share every quarter in FY2022
+  justification: ''
+  page(s)-0based: 67
+  page(s): '68'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer affirms that dividends have been / were paid
+
+
+financebench_id_00839:
+  sector: Consumer Discretionary
+
+  company: Foot Locker
+  period: 2022
+  doc-type: 8k
+  doc: FOOTLOCKER_2022_8K_dated_2022-08-19
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Does Foot Locker's new CEO have previous CEO experience in a similar company
+    to Footlocker?
+
+  answer: Yes. She was previous CEO of Ulta Beauty which means she had to manage a
+    large retail company that has brick and mortar + online business. So yes she was
+    a CEO in a similar company to Foot Locker before this.
+  justification: ''
+  page(s)-0based: 1
+  page(s): '2'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer affirms that Dillon has got experience in relevant and similar organizations and roles
+
+  evaluator-unreliable: true
+
+
+financebench_id_00822:
+  sector: Consumer Discretionary
+
+  company: Foot Locker
+  period: 2022
+  doc-type: 8k
+  doc: FOOTLOCKER_2022_8K_dated-2022-05-20
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Were there any board member nominees who had substantially more votes
+    against joining than the other nominees?
+
+  answer: Yes, his name is Richard A. Johnson
+  justification: Richard A. Johnson had roughly 16.1 million votes against him joining
+    whereas the maximum votes against joining among all other candidates was roughly
+    6.1 million.
+  page(s)-0based: 1
+  page(s): '2'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Johnson as receiving many votes against
+
+  evaluator-unreliable: true
+
+
+financebench_id_04103:
+  sector: Consumer Staples
+
+  company: General Mills
+  period: 2019
+  doc-type: 10k
+  doc: GENERALMILLS_2019_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'What is the FY2019 cash conversion cycle (CCC) for General Mills? CCC
+    is defined as: DIO + DSO - DPO. DIO is defined as: 365 * (average inventory between
+    FY2018 and FY2019) / (FY2019 COGS). DSO is defined as: 365 * (average accounts
+    receivable between FY2018 and FY2019) / (FY2019 Revenue). DPO is defined as: 365
+    * (average accounts payable between FY2018 and FY2019) / (FY2019 COGS + change
+    in inventory between FY2018 and FY2019). Round your answer to two decimal places.
+    Address the question by using the line items and information shown within the
+    income statement and the balance sheet.'
+
+  answer: '-3.7'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Accounts payable. This metric was located in the 10K as a single line
+    item named: Accounts payable.
+
+
+    Metric 2: Accounts receivable, net. This metric was located in the 10K as a single
+    line item named: Receivables.
+
+
+    Metric 3: Cost of goods sold. This metric was located in the 10K as a single line
+    item named: Cost of sales.
+
+
+    Metric 4: Total revenue. This metric was located in the 10K as a single line item
+    named: Net sales.
+
+
+    Metric 5: Inventories. This metric was located in the 10K as a single line item
+    named: Inventories.'
+  page(s)-0based: 52
+  page(s): 53,55
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Cash Conversion Cycle (CCC) metric value
+    that is NEGATIVE and in the range from -5.00 to -2.00, based on supporting calculated
+    Days Inventory Oustanding (DIO), Days Sales Outstanding (DSO) and Days Payable Outstanding (DPO) metric values
+  answer-inadequate: true
+
+
+financebench_id_03471:
+  sector: Consumer Staples
+
+  company: General Mills
+  period: 2020
+  doc-type: 10k
+  doc: GENERALMILLS_2020_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: By drawing conclusions from the information stated only in the statement
+    of financial position, what is General Mills's FY2020 working capital ratio? Define
+    working capital ratio as total current assets divided by total current liabilities.
+    Round your answer to two decimal places.
+
+  answer: '0.68'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Total current liabilities. This metric was located in the 10K as a single
+    line item named: Total current liabilities.
+
+
+    Metric 2: Total current assets. This metric was located in the 10K as a single
+    line item named: Total current assets.'
+  page(s)-0based: 49
+  page(s): '50'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Working Capital Ratio decimal value that is in the range from 0.6500 to 0.7000,
+    or, alternatively, a calculated percentage value that is in the range from 65.00% to 70.00%
+    (if the answer is a single number, assume that it is that calculated Working Capital Ratio metric value)
+
+
+financebench_id_04854:
+  sector: Consumer Staples
+
+  company: General Mills
+  period: 2020
+  doc-type: 10k
+  doc: GENERALMILLS_2020_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'According to the information provided in the statement of cash flows,
+    what is the FY2020 free cash flow (FCF) for General Mills? FCF here is defined
+    as: (cash from operations - capex). Answer in USD millions.'
+
+  answer: $3215.00
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Cash from operations. This metric was located in the 10K as a single
+    line item named: Net cash provided by operating activities.
+
+
+    Metric 2: Capital expenditures. This metric was located in the 10K as a single
+    line item named: Purchases of land, buildings, and equipment.'
+  page(s)-0based: 51
+  page(s): '52'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Free Cash Flows (FCF) metric value that is equivalent to or approximately equal to
+    3215.4, 3215.4 million, 3.2154 billion,
+    3215, 3215 million, 3.215 billion,
+    3200, 3200 million or 3.2 billion
+    (if the answer is a single number, assume that it is that calculated Free Cash Flows (FCF) metric value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_10136:
+  sector: Consumer Staples
+
+  company: General Mills
+  period: 2022
+  doc-type: 10k
+  doc: GENERALMILLS_2022_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'We want to calculate a financial metric. Please help us compute it by
+    basing your answers off of the cash flow statement and the income statement. Here''s
+    the question: what is the FY2022 retention ratio (using total cash dividends paid
+    and net income attributable to shareholders) for General Mills? Round answer to
+    two decimal places.'
+
+  answer: '0.54'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Total cash dividends paid out. This metric was located in the 10K as
+    a single line item named: Dividends paid.
+
+
+    Metric 2: Net income. This metric was located in the 10K as a single line item
+    named: Net earnings attributable to General Mills.'
+  page(s)-0based: 44
+  page(s): 45,49
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Retention Ratio decimal value that is in the range from 0.5000 to 0.6000,
+    or, alternatively, a calculated percentage value that is in the range from 50.00% to 60.00%
+    (if the answer is a single number, assume that it is that calculated Retention Ratio metric value)
+
+
+financebench_id_00956:
+  sector: Health Care
+
+  company: Johnson & Johnson
+  period: 2022
+  doc-type: 10k
+  doc: JOHNSON_JOHNSON_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning)
+  domain-question-num: dg05
+  question: Are JnJ's FY2022 financials that of a high growth company?
+
+  answer: No, JnJ's FY2022 financials are not of a high growth company as sales grew
+    by 1.3% in FY2022.
+  justification: ''
+  page(s)-0based: 27
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions low/slow Sales Revenue growth
+
+
+financebench_id_00669:
+  sector: Health Care
+
+  company: Johnson & Johnson
+  period: 2022
+  doc-type: 10k
+  doc: JOHNSON_JOHNSON_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning) OR Numerical
+    reasoning OR Logical reasoning
+  domain-question-num: dg16
+  question: What drove gross margin change as of FY2022 for JnJ? If gross margin is
+    not a useful metric for a company like this, then please state that and explain
+    why.
+
+  answer: 'For FY22, JnJ had changes in gross margin due to: One-time COVID-19 vaccine
+    manufacturing exit related costs, Currency impacts in the Pharmaceutical segment,
+    Commodity inflation in the MedTech and Consumer Health segments, partially offset
+    by Supply chain benefits in the Consumer Health segment.'
+  justification: Gross margin change is equivalent to the increase in cost of products
+    sold as a percent to sales.
+  page(s)-0based: 33
+
+  category: 5-EXPLAIN-FACTORS
+  correctness: |-
+    the answer mentions at least 2 of following:
+    - one-time COVID-19 vaccine manufacturing exit related costs;
+    - currency impacts in the Pharmaceutical segment;
+    - commodity inflation in the MedTech and Consumer Health segments; and/or
+    - supply chain benefits in the Consumer Health segment
+
+  evaluator-unreliable: true
+
+
+financebench_id_00711:
+  sector: Health Care
+
+  company: Johnson & Johnson
+  period: 2022
+  doc-type: 10k
+  doc: JOHNSON_JOHNSON_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning OR Logical reasoning
+  domain-question-num: dg25
+  question: Roughly how many times has JnJ sold its inventory in FY2022? Calculate
+    inventory turnover ratio for FY2022; if conventional inventory management is not
+    meaningful for the company then state that and explain why.
+
+  answer: JnJ sold its inventory 2.7 times in FY2022.
+  justification: Inventory turnover ratio = Cost of products sold/average inventories
+    = 31,089/((12,483+10,387)/2) = 2.7
+  page(s)-0based: 45
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Inventory Turnover Ratio decimal value that is in the range from 2.00 to 3.00
+    (if the answer is a single number, assume that it is that calculated Inventory Turnover Ratio decimal value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_00651:  # TODO: retrieve growth rates
+  sector: Health Care
+
+  company: Johnson & Johnson
+  period: 2022
+  doc-type: Earnings
+  doc: JOHNSON_JOHNSON_2022Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Is growth in JnJ's adjusted EPS expected to accelerate in FY2023?
+
+  answer: No, rate of growth in adjusted EPS is expected to decelerate slightly from
+    3.6% in FY2022 to 3.5% in FY2023.
+  justification: FY2023 adjusted EPS growth of 3.5% is slightly lower than FY2022
+    adjusted EPS growth of 3.6%.
+  page(s)-0based: 0
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer mentions 3.5% and 3.6%,
+    or, alternatively, concludes that growth is NOT expected to accelerate
+
+  evaluator-unreliable: true
+
+
+financebench_id_01484:
+  sector: Health Care
+
+  company: Johnson & Johnson
+  period: 2022
+  doc-type: Earnings
+  doc: JOHNSON_JOHNSON_2022Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: How did JnJ's US sales growth compare to international sales growth in
+    FY2022?
+
+  answer: US sales increased 3.0% vs international sales decline of 0.6%.
+  justification: ''
+  page(s)-0based: 1
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer mentions US sales increased and international sales decreased
+
+  evaluator-unreliable: true
+
+
+financebench_id_01488:
+  sector: Health Care
+
+  company: Johnson & Johnson
+  period: 2023
+  doc-type: 8k
+  doc: JOHNSON_JOHNSON_2023_8K_dated-2023-08-30
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Which business segment of JnJ will be treated as a discontinued operation
+    from August 30, 2023 onward?
+
+  answer: The Consumer Health business segment will be treated as a discontinued operation
+    from August 30, 2023 onward.
+  justification: ''
+  page(s)-0based: 3
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer identifies Consumer Health as discontinued
+
+
+financebench_id_01490:
+  sector: Health Care
+
+  company: Johnson & Johnson
+  period: 2023
+  doc-type: 8k
+  doc: JOHNSON_JOHNSON_2023_8K_dated-2023-08-30
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What is the amount of the gain accruing to JnJ as a result of the separation
+    of its Consumer Health business segment, as of August 30, 2023?
+
+  answer: JnJ will make a gain of approximately $20 billion from the separation of
+    its Consumer Health business segment.
+  justification: ''
+  page(s)-0based: 3
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions 20 billion
+
+
+financebench_id_01491:
+  sector: Health Care
+
+  company: Johnson & Johnson
+  period: 2023
+  doc-type: 8k
+  doc: JOHNSON_JOHNSON_2023_8K_dated-2023-08-30
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What is the amount of the cash proceeds that JnJ realised from the separation
+    of Kenvue (formerly Consumer Health business segment), as of August 30, 2023?
+
+  answer: JnJ realised $13.2 billion in cash proceeds from the separation of Kenvue.
+  justification: ''
+  page(s)-0based: 3
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions 13.2 billion, or, alternatively, approximately 13 billion
+
+
+financebench_id_01487:
+  sector: Health Care
+
+  company: Johnson & Johnson
+  period: 2023
+  doc-type: Earnings
+  doc: JOHNSON_JOHNSON_2023Q2_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Did JnJ's net earnings as a percent of sales increase in Q2 of FY2023
+    compared to Q2 of FY2022?
+
+  answer: Yes, net earnings as a percent of sales increased from 20% in Q2 of FY2022
+    to 20.1% in Q2 of FY2023.
+  justification: ''
+  page(s)-0based: 9
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer mentions 20.0% (or 20%) and 20.1%, or, alternatively, mentions a slight increase
+
+
+financebench_id_00299:
+  sector: Financials
+
+  company: JPMorgan
+  period: 2021
+  doc-type: 10q
+  doc: JPMORGAN_2021Q1_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Which of JPM's business segments had the lowest net revenue in 2021 Q1?
+
+  answer: Corporate. Its net revenue was -$473 million.
+  justification: 14,605 > 12,517 > 4,077 > 2,393 > -473
+  page(s)-0based: 18
+  page(s): '19'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Corporate segment as having lowest Net Revenue
+
+
+financebench_id_02119:
+  sector: Financials
+
+  company: JPMorgan
+  period: 2021
+  doc-type: 10q
+  doc: JPMORGAN_2021Q1_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: If JPM went bankrupted by the end by 2021 Q1 and liquidated all of its
+    assets to pay its shareholders, how much could each shareholder get?
+
+  answer: They could receive $66.56 per share.
+  justification: ''
+  page(s)-0based: 5
+  page(s): '6'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity that is in the range from 60.00 to 70.00
+
+  evaluator-unreliable: true
+
+
+financebench_id_00206:
+  sector: Financials
+
+  company: JPMorgan
+  period: 2022
+  doc-type: 10k
+  doc: JPMORGAN_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning) OR Logical
+    reasoning
+  domain-question-num: dg03
+  question: Are JPM's gross margins historically consistent (not fluctuating more
+    than roughly 2% each year)? If gross margins are not a relevant metric for a company
+    like this, then please state that and explain why.
+
+  answer: Since JPM is a financial institution, gross margin is not a relevant metric.
+  justification: ''
+  page(s)-0based: 2
+  page(s): '3'
+
+  category: 6-OTHER-ADVANCED
+  correctness: >-
+    the answer argues that Gross Margin is not a very relevant/useful metric for this business model and/or industry,
+    or, alternatively, that performance in this business model and/or industry is usually not judged through Gross Margin
+
+  evaluator-unreliable: true
+
+
+financebench_id_00394:
+  sector: Financials
+
+  company: JPMorgan
+  period: 2022
+  doc-type: 10q
+  doc: JPMORGAN_2022Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: In 2022 Q2, which of JPM's business segments had the highest net income?
+
+  answer: Corporate & Investment Bank. Its net income was $3725 million.
+  justification: 3725 > 3100 > 1004 > 994 > -174
+  page(s)-0based: 20
+  page(s): '21'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Corporate & Investment Bank segment as having higest Net Income
+
+
+financebench_id_02049:
+  sector: Financials
+
+  company: JPMorgan
+  period: 2023
+  doc-type: 10q
+  doc: JPMORGAN_2023Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Looking at VaR, did the risk that JPM faced in the second fiscal quarter
+    of 2023 decrease compared to the same period in the prior year?
+
+  answer: Yes. It decreased.
+  justification: ''
+  page(s)-0based: 84
+  page(s): '85'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer affirms that VaR decreased
+
+
+financebench_id_10499:
+  sector: Consumer Staples
+
+  company: Kraft Heinz
+  period: 2019
+  doc-type: 10k
+  doc: KRAFTHEINZ_2019_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'What is Kraft Heinz''s FY2019 inventory turnover ratio? Inventory turnover
+    ratio is defined as: (FY2019 COGS) / (average inventory between FY2018 and FY2019).
+    Round your answer to two decimal places. Please base your judgments on the information
+    provided primarily in the balance sheet and the P&L statement.'
+
+  answer: '6.25'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Cost of goods sold. This metric was located in the 10K as a single line
+    item named: Cost of products sold.
+
+
+    Metric 2: Inventories. This metric was located in the 10K as a single line item
+    named: Inventories.'
+  page(s)-0based: 49
+  page(s): 50,52
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Inventory Turnover Ratio decimal value that is in the range from 6.00 to 6.50
+    (if the answer is a single number, assume that it is that calculated Inventory Turnover Ratio decimal value)
+
+
+financebench_id_04412:
+  sector: Industrials
+
+  company: Lockheed Martin
+  period: 2020
+  doc-type: 10k
+  doc: LOCKHEEDMARTIN_2020_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'We need to calculate a reasonable approximation (or exact number if possible)
+    of a financial metric. Basing your judgment by information plainly provided in
+    the balance sheet and the P&L statement, what is Lockheed Martin''s FY2020 asset
+    turnover ratio? Asset turnover ratio is defined as: FY2020 revenue / (average
+    total assets between FY2019 and FY2020). Round your answer to two decimal places.'
+
+  answer: '1.33'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Total revenue. This metric was located in the 10K as a single line item
+    named: Total net sales.
+
+
+    Metric 2: Total assets. This metric was located in the 10K as a single line item
+    named: Total assets.'
+  page(s)-0based: 66
+  page(s): 67,69
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Asset Turnover Ratio decimal value that is in the range from 1.30 to 1.40
+    (if the answer is a single number, assume that it is that calculated Asset Turnover Ratio decimal value)
+
+
+financebench_id_03031:
+  sector: Industrials
+
+  company: Lockheed Martin
+  period: 2021
+  doc-type: 10k
+  doc: LOCKHEEDMARTIN_2021_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is Lockheed Martin's FY2021 net working capital? Define net working
+    capital as total current assets less total current liabilities. Answer in USD
+    millions. Respond to the question by assuming the perspective of an investment
+    analyst who can only use the details shown within the balance sheet.
+
+  answer: $5818.00
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Total current liabilities. This metric was located in the 10K as a single
+    line item named: Total current liabilities.
+
+
+    Metric 2: Total current assets. This metric was located in the 10K as a single
+    line item named: Total current assets.'
+  page(s)-0based: 67
+  page(s): '68'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Net Working Capital metric value that is equivalent to or approximately equal to
+    5818, 5818 million, 5.818 billion,
+    5800, 5800 million or 5.8 billion
+    (if the answer is a single number, assume that it is that calculated Net Working Capital metric value)
+
+
+financebench_id_03718:
+  sector: Industrials
+
+  company: Lockheed Martin
+  period: 2022
+  doc-type: 10k
+  doc: LOCKHEEDMARTIN_2022_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is Lockheed Martin's 2 year total revenue CAGR from FY2020 to FY2022
+    (in units of percents and round to one decimal place)? Provide a response to the
+    question by primarily using the statement of income.
+
+  answer: 0.4%
+  justification: 'The metric total revenue was directly extracted from the company
+    10K. The line item name, as seen in the 10K, was: Total net sales. The final step
+    was to execute the desired CAGR calculation on total revenue.'
+  page(s)-0based: 62
+  page(s): '63'
+
+  category: 2-CALC-CHANGE
+  correctness: >-
+    the answer contains a calculated CAGR percentage value that is in the range from 0.400% to 0.500%
+    (if the answer is a single number, assume that it is that calculated CAGR percentage value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_04171:
+  sector: Consumer Discretionary
+
+  company: MGM Resorts
+  period: 2018
+  doc-type: 10k
+  doc: MGMRESORTS_2018_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: Basing your judgments off of the balance sheet, what is the year end FY2018
+    amount of accounts payable for MGM Resorts? Answer in USD millions.
+
+  answer: $303.00
+  justification: 'The metric accounts payable was directly extracted from the company
+    10K. The line item name, as seen in the 10K, was: Accounts payable.'
+  page(s)-0based: 56
+  page(s): '57'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity that is equivalent to or approximately equal to
+    302.6, 302.6 million, 0.3026 billion,
+    303, 303 million, 0.303 billion,
+    300, 300 million or 0.3 billion
+
+  evaluator-unreliable: true
+
+
+financebench_id_03849:
+  sector: Consumer Discretionary
+
+  company: MGM Resorts
+  period: 2020
+  doc-type: 10k
+  doc: MGMRESORTS_2020_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is the FY2018 - FY2020 3 year average of capex as a % of revenue
+    for MGM Resorts? Answer in units of percents and round to one decimal place. Please
+    utilize information provided primarily within the statement of cash flows and
+    the statement of income.
+
+  answer: 7.9%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Capital expenditures. This metric was located in the 10K as a single
+    line item named: Capital expenditures, net of construction payable.
+
+
+    Metric 2: Total revenue. This metric was located in the 10K as a single line item
+    named: [blank line item referring to total revenue].'
+  page(s)-0based: 64
+  page(s): 65,67
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated metric percentage value that is in the range from 7.50% to 8.50%,
+    or, alternatively, a calculated decimal value that is in the range from 0.0750 to 0.0850
+    (if the answer is a single number, assume that it is that calculated metric value)
+
+
+financebench_id_01254:
+  sector: Consumer Discretionary
+
+  company: MGM Resorts
+  period: 2022
+  doc-type: 10k
+  doc: MGMRESORTS_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg18
+  question: Has MGM Resorts paid dividends to common shareholders in FY2022?
+
+  answer: Yes. MGM maintained 0.01$ per share annual dividend through out FY 2022.
+  justification: ''
+  page(s)-0based: 31
+  page(s): '32'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer affirms that dividends have been / were paid
+
+  evaluator-unreliable: true
+
+
+financebench_id_00382:
+  sector: Consumer Discretionary
+
+  company: MGM Resorts
+  period: 2022
+  doc-type: Earnings
+  doc: MGMRESORTS_2022Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Which region had the Highest EBITDAR Contribution for MGM during FY2022?
+
+  answer: Las Vegas resorts contributed ~90% of company level EBITDAR during FY2022.
+  justification: 3142308/3497254
+  page(s)-0based: 12
+  page(s): '13'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Las Vegas resorts as having highest EBITDAR
+
+
+financebench_id_01911:
+  sector: Consumer Discretionary
+
+  company: MGM Resorts
+  period: 2022
+  doc-type: Earnings
+  doc: MGMRESORTS_2022Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What was MGM's interest coverage ratio using FY2022 Adjusted EBIT as the
+    numerator and annual Interest Expense as the denominator?
+
+  answer: As adjusted EBIT is negative, coverage ratio is zero
+  justification: ''
+  page(s)-0based: 13
+  page(s): '14'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Interest Coverage Ratio metric value,
+    or, alternatively, concludes that Interest Coverage Ratio is zero
+  answer-inadequate: true
+
+
+financebench_id_01912:
+  sector: Consumer Discretionary
+
+  company: MGM Resorts
+  period: 2022
+  doc-type: Earnings
+  doc: MGMRESORTS_2022Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Which region had the worst topline performance for MGM during FY2022?
+
+  answer: MGM China experienced the worst topline performance amongst the other regions
+    presented. Its revenue declined 44% in FY2022 whereas the other regions presented
+    increased their revenues.
+  justification: ''
+  page(s)-0based: 2
+  page(s): 3,4,4
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies MGM China as having worst top-line Revenue performance
+
+
+financebench_id_00407:
+  sector: Consumer Discretionary
+
+  company: MGM Resorts
+  period: 2023
+  doc-type: 10q
+  doc: MGMRESORTS_2023Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Which type of debt received the largest investment among the short term
+    investments for MGM in H1 FY2023?
+
+  answer: the biggest short term investment is in corporate bonds (almost 82% of the
+    total investment)
+  justification: 416420/509921
+  page(s)-0based: 10
+  page(s): '11'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies corporate bonds as having received largest short-term investment
+
+
+financebench_id_04700:
+  sector: Information Technology
+
+  company: Microsoft
+  period: 2016
+  doc-type: 10k
+  doc: MICROSOFT_2016_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: What is the FY2016 COGS for Microsoft? Please state answer in USD millions.
+    Provide a response to the question by primarily using the statement of income.
+
+  answer: $32780.00
+  justification: 'The metric cost of goods sold was directly extracted from the company
+    10K. The line item name, as seen in the 10K, was: Total cost of revenue.'
+  page(s)-0based: 51
+  page(s): '52'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity that is equivalent to or approximately equal to
+    32780, 32780 million, 32.78 billion,
+    32800, 32800 million, 32.8 billion
+    33000, 33000 million or 33 billion
+
+
+financebench_id_00552:
+  sector: Information Technology
+
+  company: Microsoft
+  period: 2023
+  doc-type: 10k
+  doc: MICROSOFT_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning
+  domain-question-num: dg22
+  question: Has Microsoft increased its debt on balance sheet between FY2023 and the
+    FY2022 period?
+  answer: No. Microsoft decreased its debt by $2.5bn in FY 2023 vs FY 2022.
+  justification: 'Current portion of long-term debt+Long-term debt
+
+    5247+41990
+
+    2749+47032'
+  page(s)-0based: 59
+  page(s): '60'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains calculated Total Debt values for 2022 and 2023, and concludes that Total Debt decreased
+  answer-inadequate: true
+
+
+financebench_id_04458:
+  sector: Communication Services
+
+  company: Netflix
+  period: 2015
+  doc-type: 10k
+  doc: NETFLIX_2015_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'We want to calculate a financial metric. Please help us compute it by
+    basing your answers off of the statement of income and the statement of cash flows.
+    Here''s the question: what is the FY2015 unadjusted EBITDA % margin for Netflix?
+    Calculate unadjusted EBITDA using unadjusted operating income and D&A (from cash
+    flow statement).'
+
+  answer: 5.4%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Depreciation and amortization. This metric was located in the 10K as
+    a single line item named: Depreciation and amortization of property, equipment
+    and intangibles.
+
+
+    Metric 2: Unadjusted operating income. This metric was located in the 10K as a
+    single line item named: Operating income.
+
+
+    Metric 3: Total revenue. This metric was located in the 10K as a single line item
+    named: Revenues.'
+  page(s)-0based: 39
+  page(s): 40,42
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated EBITDA Margin percentage value that is in the range from 5.00% to 5.50%,
+    or, alternatively, a calculated decimal value that is in the range from 0.0500 to 0.0550,
+    assuming that EBITDA = "Operating Income" + "Depreciation & Amortization of Property, Equipment & Intangibles"
+    (if the answer is a single number, assume that it is that calculated EBITDA Margin metric value)
+
+
+financebench_id_03282:
+  sector: Communication Services
+
+  company: Netflix
+  period: 2017
+  doc-type: 10k
+  doc: NETFLIX_2017_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: What is Netflix's year end FY2017 total current liabilities (in USD millions)?
+    Base your judgments on the information provided primarily in the balance sheet.
+
+  answer: $5466.00
+  justification: 'The metric total current liabilities was directly extracted from
+    the company 10K. The line item name, as seen in the 10K, was: Total current liabilities.'
+  page(s)-0based: 44
+  page(s): '45'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity that is equivalent to or approximately equal to
+    5466.3, 5466.3 million, 5.4663 billion,
+    5466, 5466 million, 5.466 billion,
+    5500, 5500 million or 5.5 billion
+
+  evaluator-unreliable: true
+
+
+financebench_id_04302:
+  sector: Consumer Discretionary
+
+  company: Nike
+  period: 2018
+  doc-type: 10k
+  doc: NIKE_2018_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: We need to calculate a reasonable approximation (or exact number if possible)
+    of a financial metric. Basing your judgment by information plainly provided in
+    the statement of income, what is Nike's three year average of cost of goods sold
+    as a % of revenue from FY2016 to FY2018? Answer in units of percents and round
+    to one decimal place.
+
+  answer: 55.1%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Cost of goods sold. This metric was located in the 10K as a single line
+    item named: Cost of sales.
+
+
+    Metric 2: Total revenue. This metric was located in the 10K as a single line item
+    named: Revenues.'
+  page(s)-0based: 45
+  page(s): '46'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated metric percentage value that is in the range from 50.00% to 60.00%,
+    or, alternatively, a calculated decimal value that is in the range from 0.5000 to 0.6000
+    (if the answer is a single number, assume that it is that calculated metric value)
+
+
+financebench_id_03531:
+  sector: Consumer Discretionary
+
+  company: Nike
+  period: 2019
+  doc-type: 10k
+  doc: NIKE_2019_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: According to the details clearly outlined within the balance sheet, how
+    much total current assets did Nike have at the end of FY2019? Answer in USD millions.
+
+  answer: $16525.00
+  justification: 'The metric total current assets was directly extracted from the
+    company 10K. The line item name, as seen in the 10K, was: Total current assets.'
+  page(s)-0based: 53
+  page(s): '54'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity that is equivalent to or approximately equal to
+    16525, 16525 million, 16.525 billion,
+    16500, 16500 million or 16.5 billion
+
+
+financebench_id_04080:
+  sector: Consumer Discretionary
+
+  company: Nike
+  period: 2021
+  doc-type: 10k
+  doc: NIKE_2021_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'When primarily referencing the income statement and the statement of
+    financial position, what is the FY2021 inventory turnover ratio for Nike? Inventory
+    turnover ratio is defined as: (FY2021 COGS) / (average inventory between FY2020
+    and FY2021). Round your answer to two decimal places.'
+
+  answer: '3.46'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Cost of goods sold. This metric was located in the 10K as a single line
+    item named: Cost of sales.
+
+
+    Metric 2: Inventories. This metric was located in the 10K as a single line item
+    named: Inventories.'
+  page(s)-0based: 58
+  page(s): 59,61
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Inventory Turnover Ratio decimal value that is in the range from 3.00 to 4.00
+    (if the answer is a single number, assume that it is that calculated Inventory Turnover Ratio decimal value)
+
+
+financebench_id_01163:
+  sector: Consumer Discretionary
+
+  company: Nike
+  period: 2023
+  doc-type: 10k
+  doc: NIKE_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning
+  domain-question-num: dg19
+  question: Among operations, investing, and financing activities, which brought in
+    the most (or lost the least) cash flow for Nike in FY2023?
+
+  answer: Among the three, cash flow from operations was the highest for Nike in FY2023.
+  justification: ''
+  page(s)-0based: 61
+  page(s): '62'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Operations / Operating Cash Flows as bringing in most cash
+
+
+financebench_id_00080:
+  sector: Financials
+
+  company: Paypal
+  period: 2022
+  doc-type: 10k
+  doc: PAYPAL_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning OR Logical reasoning
+  domain-question-num: dg24
+  question: Does Paypal have positive working capital based on FY2022 data? If working
+    capital is not a useful or relevant metric for this company, then please state
+    that and explain why.
+
+  answer: Yes. Paypal has a positive working capital of $ 1.6Bn as of FY2022 end.
+  justification: 'Accounts receivable, net+Loans and interest receivable, net of allowances
+    +Funds receivable and customer accounts+Prepaid expenses and other current assets-Accounts
+    payable-Funds payable and amounts due to customers-Accrued expenses and other
+    current liabilities -Income taxes payable
+
+    963+7431+36357+1898-126-40107-4055-813'
+  page(s)-0based: 60
+  page(s): '61'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer affirms that Working Capital is/was positive,
+    proving so by a calculated Working Capital metric value that is positive
+
+
+financebench_id_04980:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2021
+  doc-type: 10k
+  doc: PEPSICO_2021_10K
+
+  question-type: metrics-generated
+  question-reasoning: Information extraction
+  domain-question-num: ''
+  question: What is the FY2021 capital expenditure amount (in USD billions) for PepsiCo?
+    Respond to the question by assuming the perspective of an investment analyst who
+    can only use the details shown within the statement of cash flows.
+
+  answer: $4.60
+  justification: 'The metric capital expenditures was directly extracted from the
+    company 10K. The line item name, as seen in the 10K, was: Capital spending.'
+  page(s)-0based: 62
+  page(s): '63'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer contains a quantity that is equivalent to or approximately equal to
+    4.625, 4.625 billion, 4625 million,
+    4.6, 4.6 billion or 4600 million
+
+
+financebench_id_01009:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2022
+  doc-type: 10k
+  doc: PEPSICO_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg08
+  question: What are the geographies that Pepsico primarily operates in as of FY2022?
+
+  answer: 'As of FY2022, Pepsico primarily operates in the following geographies:
+    North America, Latin America, Europe, Africa, Middle East, South Asia, Asia Pacific,
+    Australia, New Zealand and China.'
+  justification: ''
+  page(s)-0based: 3
+  page(s): 4, 5
+
+  category: 0-RETRIEVE
+  correctness: |-
+    the answer mentions at least 3 of following geographies:
+    - North America, which includes United States and Canada;
+    - Latin America (LatAm);
+    - Europe;
+    - Africa, Middle East and South Asia (AMESA); and
+    - Asia Pacific, Australia and New Zealand and China (APAC)
+
+
+financebench_id_00735:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2022
+  doc-type: 10k
+  doc: PEPSICO_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg11
+  question: Has Pepsico reported any materially important ongoing legal battles from
+    FY2022 and FY2021?
+
+  answer: No, Pepsico is not involved in material legal battles.
+  justification: Management believes the final outcome of legal proceedings will not
+    have a material adverse outcome.
+  page(s)-0based: 25
+  page(s): '26'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer says that there have NOT been material lawsuits / legal battles,
+    or, alternatively, that lawsuits / legal battles are unlikely to have materially adverse outcomes
+
+  evaluator-unreliable: true
+
+
+financebench_id_01328:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2022
+  doc-type: 10k
+  doc: PEPSICO_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg21
+  question: What is the quantity of restructuring costs directly outlined in Pepsico's
+    income statements for FY2022? If restructuring costs are not explicitly outlined
+    then state 0.
+
+  answer: Pepsico's restructuring costs in FY2022 amounted to $411 million .
+  justification: ''
+  page(s)-0based: 77
+  page(s): '78'
+
+  category: 0-RETRIEVE
+  correctness: |-
+    the answer either:
+    - mentions a quantity that is equivalent to or approximately equal to 411 million; or
+    - states 0, zero, and/or that restructuring costs are not explicitly reported
+  answer-inadequate: true
+
+
+financebench_id_03620:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2022
+  doc-type: 10k
+  doc: PEPSICO_2022_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is the FY2022 unadjusted EBITDA less capex for PepsiCo? Define unadjusted
+    EBITDA as unadjusted operating income + depreciation and amortization [from cash
+    flow statement]. Answer in USD millions. Respond to the question by assuming the
+    perspective of an investment analyst who can only use the details shown within
+    the statement of cash flows and the income statement.
+
+  answer: $9068.00
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Depreciation and amortization. This metric was located in the 10K as
+    a single line item named: Depreciation and amortization.
+
+
+    Metric 2: Unadjusted operating income. This metric was located in the 10K as a
+    single line item named: Operating Profit.
+
+
+    Metric 3: Capital expenditures. This metric was located in the 10K as a single
+    line item named: Capital spending.'
+  page(s)-0based: 61
+  page(s): 62,64
+
+  category: 3-CALC-COMPLEX
+  correctness: |-
+    the answer contains a calculated metric value that is either:
+    - in the range from 8500 to 9500;
+    - in the range from 8500 million to 9500 million;
+    - in the range from 8.5 billion to 9.5 billion; or
+    - stated as approximately 9000 million or 9 billion
+    (if the answer is a single number, assume that it is that calculated metric value)
+
+  evaluator-unreliable: true
+
+
+financebench_id_04481:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2022
+  doc-type: 10k
+  doc: PEPSICO_2022_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is the FY2022 unadjusted EBITDA % margin for PepsiCo? Calculate unadjusted
+    EBITDA using unadjusted operating income and D&A (from cash flow statement). Give
+    a response to the question by relying on the details shown in the statement of
+    cash flows and the P&L statement.
+
+  answer: 16.5%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Depreciation and amortization. This metric was located in the 10K as
+    a single line item named: Depreciation and amortization.
+
+
+    Metric 2: Unadjusted operating income. This metric was located in the 10K as a
+    single line item named: Operating Profit.
+
+
+    Metric 3: Total revenue. This metric was located in the 10K as a single line item
+    named: Net Revenue.'
+  page(s)-0based: 61
+  page(s): 62,64
+
+  category: 3-CALC-COMPLEX
+  correctness: |-
+    the answer contains a calculated EBITDA Margin percentage value that is in the range from 16.00% to 17.00%,
+    or, alternatively, a calculated decimal value that is in the range from 0.1600 to 0.1700
+    (if the answer is a single number, assume that it is that calculated EBITDA Margin metric value)
+
+
+financebench_id_01482:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2023
+  doc-type: 8k
+  doc: PEPSICO_2023_8K_dated-2023-05-05
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: At the Pepsico AGM held on May 3, 2023, what was the outcome of the shareholder
+    vote on the shareholder proposal for a congruency report by Pepsico on net-zero
+    emissions policies?
+
+  answer: The shareholder proposal for a congruency report by Pepsico on net-zero
+    emissions policies was defeated.
+  justification: ''
+  page(s)-0based: 3
+  page(s): '4'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer says proposal related to Net-Zero Emissions was defeated / not successful
+
+
+financebench_id_00705:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2023
+  doc-type: 8k
+  doc: PEPSICO_2023_8K_dated-2023-05-30
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: By how much did Pepsico increase its unsecured five year revolving credit
+    agreement on May 26, 2023?
+
+  answer: $400,000,000 increase.
+  justification: Increase in five year unsecured revolving credit agreement = May
+    26, 2023, five year unsecured revolving credit agreement amount of $4,200,000,000
+    - May 27, 2022, five year unsecured revolving credit agreement amount of $3,800,000,000
+    = $400,000,000
+  page(s)-0based: 1
+  page(s): '2'
+
+  category: 2-CALC-CHANGE
+  correctness: >-
+    the answer contains a calculated change quantity that is equivalent to or approximately equal to
+    400,000,000, 400 million or 0.4 billion
+    (if the answer is a single number, assume that it is that calculated change amount)
+
+
+financebench_id_00882:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2023
+  doc-type: 8k
+  doc: PEPSICO_2023_8K_dated-2023-05-30
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: As of May 26, 2023, what is the total amount Pepsico may borrow under
+    its unsecured revolving credit agreements?
+
+  answer: Total amount Pepsico may borrow under unsecured revolving credit agreements
+    = $8,400,000,000.
+  justification: Total amount that may be borrowed under unsecured revolving credit
+    agreements = 2023, 364 day unsecured revolving credit agreement amount of $4,200,000,000
+    + 2023, five year unsecured revolving credit agreement amount of $4,200,000,000
+    = $8,400,000,000.
+  page(s)-0based: 1
+  page(s): '2'
+
+  category: 3-CALC-COMPLEX
+  correctness: |-
+    the answer either (or both):
+    - mentions two separate quantities each equal to 4,200,000,000, 4200 million or 4.2 billion; and/or
+    - contains a calculated total quantity that is greater than or equal to
+      8,400,000,000, 8400 million or 8.4 billion
+    (if the answer is a single number, assume that it is that latter calculated total amount)
+
+  evaluator-unreliable: true
+
+
+financebench_id_01474:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2023
+  doc-type: Earnings
+  doc: PEPSICO_2023Q1_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: As of FY2023Q1, why did Pepsico raise full year guidance for FY2023?
+
+  answer: Pepsico experienced a strong start to FY2023.
+  justification: ''
+  page(s)-0based: 0
+  page(s): '1'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions strong business performance
+
+
+financebench_id_01476:
+  sector: Consumer Staples
+
+  company: PepsiCo
+  period: 2023
+  doc-type: Earnings
+  doc: PEPSICO_2023Q1_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: As of FY2023Q1, by how many percentage points did Pepsico raise full year
+    guidance in respect of core constant currency EPS growth?
+
+  answer: Pepsico raised full year guidance in respect of core constant currency EPS
+    growth by 1 percentage point.
+  justification: ''
+  page(s)-0based: 0
+  page(s): '1'
+
+  category: 2-CALC-CHANGE
+  correctness: >-
+    the answer mentions growth guidance raised from 8% to 9%,
+    and/or growth guidance raised by 1 percentage point or 1%
+
+  evaluator-unreliable: true
+
+
+financebench_id_00302:
+  sector: Health Care
+
+  company: Pfizer
+  period: 2021
+  doc-type: 10k
+  doc: PFIZER_2021_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Did Pfizer grow its PPNE between FY20 and FY21?
+
+  answer: Yes, change in PPNE was positive year over year
+  justification: 14882 - 13745 > 0
+  page(s)-0based: 58
+  page(s): '59'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer concludes that Property, Plant & Equipment (PP&E or PPNE) increased
+
+  evaluator-unreliable: true
+
+
+financebench_id_00702:
+  sector: Health Care
+
+  company: Pfizer
+  period: 2021
+  doc-type: 10k
+  doc: PFIZER_2021_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Were there any potential events that are not in Pfizer's standard business
+    operations that substantially increased net income in 2019?
+
+  answer: Yes, the gain on completion of Consumer Healthcare JV Transaction
+  justification: Income statement shows the gain on completion of Consumer Healthcare
+    JV transaction occured in FY19. In FY21, this event did not affect the net income
+    at all due to the seemingly one time nature of the line item
+  page(s)-0based: 56
+  page(s): '57'
+
+  category: 5-EXPLAIN-FACTORS
+  correctness: >-
+    the answer mentions Consumer Healthcare JV transaction
+
+
+financebench_id_02416:  # note: Therachon is mentioned on separate following page
+  sector: Health Care
+
+  company: Pfizer
+  period: 2021
+  doc-type: 10k
+  doc: PFIZER_2021_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What are three main companies acquired by Pfizer mentioned in this 10K
+    report?
+
+  answer: Trillium, Array, and Therachon
+  justification: ''
+  page(s)-0based: 69
+  page(s): 70, 71
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions Trillium and Array
+
+
+financebench_id_00283:
+  sector: Health Care
+
+  company: Pfizer
+  period: 2023
+  doc-type: 10q
+  doc: Pfizer_2023Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: How much does Pfizer expect to pay to spin off Upjohn in the future in
+    USD million?
+
+  answer: '77.78'
+  justification: '10% cost is remaining amount in the future. Calculation: 700/9 is
+    10% of the cost remaining'
+  page(s)-0based: 40
+  page(s): '41'
+
+  category: 6-OTHER-ADVANCED
+  correctness: >-
+    the answer mentions 700 million and 90%
+
+  evaluator-unreliable: true
+
+
+financebench_id_00724:
+  sector: Health Care
+
+  company: Pfizer
+  period: 2023
+  doc-type: 10q
+  doc: Pfizer_2023Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: For Pfizer, which geographic region had the biggest drop in Q22023 year
+    over year revenues (on a percentage basis)?
+
+  answer: Developed Rest of the World
+  justification: It's plainly stated in table format the year over year revenue changes
+    for each of the regions
+  page(s)-0based: 37
+  page(s): '38'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Developed Rest of World as having worst percentage/relative decline
+
+
+financebench_id_02419:  # tricky: Upjohn spin-off started in 2020 but would complete in 2023
+  sector: Health Care
+
+  company: Pfizer
+  period: 2023
+  doc-type: 10q
+  doc: Pfizer_2023Q2_10Q
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: As of Q2'2023, is Pfizer spinning off any large business segments?
+
+  answer: Yes, it's spinning off Upjohn.
+  justification: ''
+  page(s)-0based: 40
+  page(s): '41'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions Upjohn
+
+  evaluator-unreliable: true
+
+
+financebench_id_00746:
+  sector: Consumer Discretionary
+
+  company: Ulta Beauty
+  period: 2023
+  doc-type: 10k
+  doc: ULTABEAUTY_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg04
+  question: Which debt securities are registered to trade on a national securities
+    exchange under Ulta Beauty's name as of FY2023?
+
+  answer: There are none
+  justification: No debt securities listed under securities registered pursuant to
+    Section 12(b) of the Act.
+  page(s)-0based: 0
+  page(s): '1'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer concludes that there are no debt securities traded,
+    or, alternatively, that no such debt securities are explicitly reported
+
+
+financebench_id_00521:
+  sector: Consumer Discretionary
+
+  company: Ulta Beauty
+  period: 2023
+  doc-type: 10k
+  doc: ULTABEAUTY_2023_10K
+
+  question-type: domain-relevant
+  question-reasoning: Information extraction
+  domain-question-num: dg10
+  question: What are major acquisitions that Ulta Beauty has done in FY2023 and FY2022?
+
+  answer: Ulta Beauty did not make any acquisitions in FY2023 and FY2022.
+  justification: Consolidated statement of cash flows reflects - for Acquisitions,
+    net of cash acquired in FY2023 and FY2022.
+  page(s)-0based: 56
+  page(s): '57'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer concludes that there are no major acquisitions,
+    or, alternatively, that no such major acquisitions are explicitly reported
+
+
+financebench_id_00601:
+  sector: Consumer Discretionary
+
+  company: Ulta Beauty
+  period: 2023
+  doc-type: Earnings
+  doc: ULTABEAUTY_2023Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What drove the reduction in SG&A expense as a percent of net sales in
+    FY2023?
+
+  answer: Lower marketing expenses and leverage of incentive compensation due to higher
+    sales. The answer here assumes FY2023 refers to the 12 months ended on January
+    28, 2023 (although the company refers to this period as its fiscal 2022.
+  justification: Fiscal 2022 = FY2023. Fiscal 2021 = FY2022.
+  page(s)-0based: 1
+  page(s): '2'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions marketing expenses and incentive compensation
+  answer-inadequate: true
+
+
+financebench_id_00603:
+  sector: Consumer Discretionary
+
+  company: Ulta Beauty
+  period: 2023
+  doc-type: Earnings
+  doc: ULTABEAUTY_2023Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What drove the increase in Ulta Beauty's merchandise inventories balance
+    at end of FY2023?
+
+  answer: Increase in Merchandise inventories balance was driven by the opening of
+    47 new stores. The answer here assumes FY2023 refers to the 12 months ended on
+    January 28, 2023 (although the company refers to this period as its fiscal 2022.
+  justification: Fiscal 2022 = FY2023. Fiscal 2021 = FY2022.
+  page(s)-0based: 2
+  page(s): '2'
+
+  category: 0-RETRIEVE
+  correctness: >-
+    the answer mentions new stores
+
+
+financebench_id_00605:
+  sector: Consumer Discretionary
+
+  company: Ulta Beauty
+  period: 2023
+  doc-type: Earnings
+  doc: ULTABEAUTY_2023Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: What percent of Ulta Beauty's total spend on stock repurchases for FY
+    2023 occurred in Q4 of FY2023?
+
+  answer: 36%. The answer here assumes FY2023 refers to the 12 months ended on January
+    28, 2023 (although the company refers to this period as its fiscal 2022.
+  justification: Fiscal 2022 = FY2023. Fiscal 2021 = FY2022. Percent spent in Q4 of
+    FY2023 = Amount spent in Q4 of FY2023/Total amount spent in FY2023*100 =$328.1
+    million /$900 million * 100 = 36%
+  page(s)-0based: 2
+  page(s): '3'
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated percentage value that is in the range from 30% to 40%
+    (if the answer is a single number, assume that it is that calculated percentage value)
+
+
+financebench_id_00606:  # tricky: highly implicit wordings
+  sector: Consumer Discretionary
+
+  company: Ulta Beauty
+  period: 2023
+  doc-type: Earnings
+  doc: ULTABEAUTY_2023Q4_EARNINGS
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Did Ulta Beauty's wages expense as a percent of net sales increase or
+    decrease in FY2023?
+
+  answer: Wages expense as a percent of net sales increased in FY2023. The answer
+    here assumes FY2023 refers to the 12 months ended on January 28, 2023 (although
+    the company refers to this period as its fiscal 2022.
+  justification: Fiscal 2022 = FY2023. Fiscal 2021 = FY2022. Store payroll and benefits
+    = wages. Store payroll and benefits offsets reduction in SG&A percent of net sales
+    in FY2023.
+  page(s)-0based: 1
+  page(s): '2'
+
+  category: 6-OTHER-ADVANCED
+  correctness: >-
+    the answer concludes that Wages as percent of Net Sales increased
+
+
+financebench_id_00859:
+  sector: Communication Services
+
+  company: Verizon
+  period: 2021
+  doc-type: 10k
+  doc: VERIZON_2021_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: Among all of the derivative instruments that Verizon used to manage the
+    exposure to fluctuations of foreign currencies exchange rates or interest rates,
+    which one had the highest notional value in FY 2021?
+
+  answer: Cross currency swaps. Its notional value was $32,502 million.
+  justification: The derivative instruments used to mangae the exposure were interest
+    rate swaps, cross currency swaps, forward starting interest rate swaps, and foreign
+    exchange forwards. 32502 > 19779 > 1000 > 932
+  page(s)-0based: 84
+  page(s): '85'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer identifies Cross Currency Swaps as having highest notional value
+
+
+financebench_id_02024:
+  sector: Communication Services
+
+  company: Verizon
+  period: 2021
+  doc-type: 10k
+  doc: VERIZON_2021_10K
+
+  question-type: novel-generated
+  question-reasoning: ''
+  domain-question-num: ''
+  question: As of FY 2021, how much did Verizon expect to pay for its retirees in
+    2024?
+
+  answer: The estimated pension benefits were $1097 million, and the estimated health
+    care and life insurance benefits were $862 million.
+  justification: ''
+  page(s)-0based: 62
+  page(s): 63, 94
+
+  category: 0-RETRIEVE
+  correctness: |-
+    the answer mentions at least 1 of following:
+    - amount of 1,097 million, or 1.1 billion, or approximately equivalent amount (explicitly or implicitly for "Pension (Benefits)");
+    - amount of 862 million, or approximately equivalent amount (explicitly or implicitly for "Health Care & Life (Insurance)"; or
+    - total amount of 1,959 million, or 1.96 billion, or 2.0 billion, or an approximately equivalent amount
+
+
+financebench_id_00216:
+  sector: Communication Services
+
+  company: Verizon
+  period: 2022
+  doc-type: 10k
+  doc: VERIZON_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning) OR Logical
+    reasoning
+  domain-question-num: dg01
+  question: Does Verizon have a reasonably healthy liquidity profile based on its
+    quick ratio for FY 2022? If the quick ratio is not relevant to measure liquidity,
+    please state that and explain why.
+
+  answer: No. The quick ratio was approximately 0.54 for Verizon. It indicated that
+    Verizon does not have a healthy liquidity profile.
+  justification: Quick ratio = (current assets - inventories - prepaid expenses) /
+    current liabilities = (37857 - 2388 - 8358) / 50171 = 0.5403719
+  page(s)-0based: 55
+  page(s): '56'
+
+  category: 4-CALC-AND-JUDGE
+  correctness: >-
+    the answer contains a calculated Quick Ratio decimal value that is in the range from 0.40 to 0.80,
+    or, alternatively, a calculated percentage value that is in the range from 40% to 80%
+
+
+financebench_id_00215:
+  sector: Communication Services
+
+  company: Verizon
+  period: 2022
+  doc-type: 10k
+  doc: VERIZON_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Logical reasoning (based on numerical reasoning)
+  domain-question-num: dg06
+  question: Is Verizon a capital intensive business based on FY 2022 data?
+
+  answer: Yes. Verizon's capital intensity ratio was approximately 2.774729. This
+    means that it took approximately $2.77 of assets to generate $1 of revenue and
+    thus, Verizon can be considered capital intensive.
+  justification: capital intensity ratio = total asset / revenue = 379680/ 136835
+    = 2.774729, which is relatively high
+  page(s)-0based: 55
+  page(s): 56, 23
+
+  category: 4-CALC-AND-JUDGE
+  correctness: >-
+    the answer opines that Verizon's business is capital-intensive, and justifies such opinion with a calculated ratio
+
+  evaluator-unreliable: true
+
+
+financebench_id_00566:
+  sector: Communication Services
+
+  company: Verizon
+  period: 2022
+  doc-type: 10k
+  doc: VERIZON_2022_10K
+
+  question-type: domain-relevant
+  question-reasoning: Numerical reasoning
+  domain-question-num: dg22
+  question: Has Verizon increased its debt on balance sheet between 2022 and the 2021
+    fiscal period?
+
+  answer: No. Verizon's debt decreased by $229 million.
+  justification: debt change = debt in 2022 - debt in 2021 = 150639 - 150868 = -229
+  page(s)-0based: 76
+  page(s): '77'
+
+  category: 1-COMPARE
+  correctness: >-
+    the answer concludes that debt decreased
+
+  evaluator-unreliable: true
+
+
+financebench_id_06247:
+  sector: Consumer Staples
+
+  company: Walmart
+  period: 2018
+  doc-type: 10k
+  doc: WALMART_2018_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: 'What is FY2018 days payable outstanding (DPO) for Walmart? DPO is defined
+    as: 365 * (average accounts payable between FY2017 and FY2018) / (FY2018 COGS
+    + change in inventory between FY2017 and FY2018). Round your answer to two decimal
+    places. Please base your judgments on the information provided primarily in the
+    statement of financial position and the P&L statement.'
+
+  answer: '42.69'
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Accounts payable. This metric was located in the 10K as a single line
+    item named: Accounts payable.
+
+
+    Metric 2: Inventories. This metric was located in the 10K as a single line item
+    named: Inventories.
+
+
+    Metric 3: Cost of goods sold. This metric was located in the 10K as a single line
+    item named: Cost of sales.'
+  page(s)-0based: 56
+  page(s): 57,59
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated Days Payable Outstanding (DPO) decimal value that is in the range from 35.00 to 50.00
+    (if the answer is a single number, assume that it is that calculated Days Payable Outstanding (DPO) decimal value)
+
+
+financebench_id_04784:
+  sector: Consumer Staples
+
+  company: Walmart
+  period: 2019
+  doc-type: 10k
+  doc: WALMART_2019_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: Based on the information provided primarily in the statement of income,
+    what is the FY2018 - FY2019 change in unadjusted operating income % margin for
+    Walmart? Answer in units of percents and round to one decimal place.
+
+  answer: 0.2%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Unadjusted operating income. This metric was located in the 10K as a
+    single line item named: Operating income.
+
+
+    Metric 2: Total revenue. This metric was located in the 10K as a single line item
+    named: Total revenues.'
+  page(s)-0based: 47
+  page(s): '48'
+
+  category: 3-CALC-COMPLEX
+  correctness: |-
+    the answer contains either:
+    - calculated Operating Income Margin percentage values for 2018 and 2019,
+      and their difference, which is a percentage value less than 0.5% in magnitude; or
+    - calculated Operating Income Margin decimal values for 2028 and 2019,
+      and their difference, which is a decimal value less than 0.005 in magnitude
+  answer-inadequate: true
+
+
+financebench_id_06741:
+  sector: Consumer Staples
+
+  company: Walmart
+  period: 2020
+  doc-type: 10k
+  doc: WALMART_2020_10K
+
+  question-type: metrics-generated
+  question-reasoning: Numerical reasoning
+  domain-question-num: ''
+  question: What is the FY2018 - FY2020 3 year average unadjusted EBITDA % margin
+    for Walmart? Define unadjusted EBITDA as unadjusted operating income + depreciation
+    and amortization from the cash flow statement. Answer in units of percents and
+    round to one decimal place. Calculate what was asked by utilizing the line items
+    clearly shown in the P&L statement and the cash flow statement.
+
+  answer: 6.2%
+  justification: 'The metric in question was calculated using other simpler metrics.
+    The various simpler metrics (from the current and, if relevant, previous fiscal
+    year(s)) used were:
+
+
+    Metric 1: Depreciation and amortization. This metric was located in the 10K as
+    a single line item named: Depreciation and amortization.
+
+
+    Metric 2: Unadjusted operating income. This metric was located in the 10K as a
+    single line item named: Operating income.
+
+
+    Metric 3: Total revenue. This metric was located in the 10K as a single line item
+    named: Total revenues.'
+  page(s)-0based: 50
+  page(s): 51,56
+
+  category: 3-CALC-COMPLEX
+  correctness: >-
+    the answer contains a calculated EBITDA Margin percentage value that is in the range from 5.50% to 6.50%,
+    or, alternatively, a calculated decimal value that is in the range from 0.0550 to 0.0650
+    (if the answer is a single number, assume that it is that calculated EBITDA Margin metric value)
diff --git a/examples/FinanceBench-Lite/knowledge-store.txt b/examples/FinanceBench-Lite/knowledge-store.txt
new file mode 100644
index 000000000..e623a859d
--- /dev/null
+++ b/examples/FinanceBench-Lite/knowledge-store.txt
@@ -0,0 +1,45 @@
+Liquidity Metric Formulas
+-------------------------
+
+`(Net) Working Capital` = `(Total) Current Assets` - `(Total) Current Liabilities`
+
+`Working Capital Ratio` = `(Total) Current Assets` / `(Total) Current Liabilities`
+
+`Quick Ratio` = (
+  (`Cash & Cash Equivalents` +
+   `Short-Term Investments or (Current) Marketable Securities` +
+   `(Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables`)
+  / `(Total) Current Liabilities`
+)
+
+`Operating Cash Flow Ratio` = (
+  `(Net) Cash Flows from Operations, a.k.a. (Net) Operating Cash Flows`
+  / `(Total) Current Liabilities`
+)
+
+`Free Cash Flow, a.k.a. FCF` = (
+  `(Net) Cash Flows from Operations, a.k.a. (Net) Operating Cash Flows` -
+  `Capital Expenditure(s), a.k.a. CapEx, or Capital Spending, or Property, Plant & Equipment (PP&E) Expenditure(s)/Purchase(s)`
+)
+
+`Free Cash Flow Conversion Ratio` = `Free Cash Flow, a.k.a. FCF` / `Earnings before Interest, Tax, Depreciation & Amortization, a.k.a. EBITDA`
+
+`Days Inventory Outstanding, a.k.a. DIO` = (
+  365 * `average (Total) (Net) Inventory(ies), typically between two consecutive fiscal year-ends`
+  / `(Total) Cost of Goods Sold, a.k.a. (Total) COGS, or (Total) Cost of Sales, or (Total) Cost of Revenue`
+)
+
+`Days Payable Outstanding, a.k.a. DPO` = (
+  365 * `average Accounts Payable, typically between two consecutive fiscal year-ends`
+  / (`(Total) Cost of Goods Sold, a.k.a. (Total) COGS, or (Total) Cost of Sales, or (Total) Cost of Revenue` +
+     `change in (Total) (Net) Inventory(ies), typically between two consecutive fiscal year-ends`)
+)
+
+`Days Sales Oustanding, a.k.a. DSO` = (
+  365 * `average (Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables, typically between two consecutive fiscal year-ends`
+  / `(Total) (Net) (Operating) Revenue(s), a.k.a. (Total) (Net) Sales`
+)
+
+`Cash Conversion Cycle, a.k.a. CCC` = (
+  `Days Inventory Outstanding, a.k.a. DIO` + `Days Sales Oustanding, a.k.a. DSO` - `Days Payable Outstanding, a.k.a. DPO`
+)
diff --git a/examples/FinanceBench-Lite/log.py b/examples/FinanceBench-Lite/log.py
new file mode 100644
index 000000000..874f12f53
--- /dev/null
+++ b/examples/FinanceBench-Lite/log.py
@@ -0,0 +1,39 @@
+from pathlib import Path
+# import sys
+
+from loguru import logger
+
+from data_and_knowledge import FbId, DOC_NAMES_BY_FB_ID
+
+
+LOG_DIR_PATH: Path = Path(__file__).parent / '.log'
+CURRENT_LOG_HANDLER_ID: int | None = None
+
+
+# loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add
+# logger.add(sink=sys.stdout, level='DEBUG',
+#            # format=...,
+#            filter=None,
+#            colorize=True,
+#            serialize=False,
+#            backtrace=True, diagnose=True,
+#            enqueue=False, context=None,
+#            catch=True)
+
+
+def switch_log_file(fb_id: FbId, output_name: str):
+    global CURRENT_LOG_HANDLER_ID  # pylint: disable=global-statement
+
+    if CURRENT_LOG_HANDLER_ID is not None:
+        logger.remove(handler_id=CURRENT_LOG_HANDLER_ID)
+
+    CURRENT_LOG_HANDLER_ID = logger.add(sink=(Path(LOG_DIR_PATH) /
+                                              DOC_NAMES_BY_FB_ID[fb_id] / fb_id[16:] / f'{output_name}.log'),
+                                        level='DEBUG',
+                                        # format=...,
+                                        filter=None,
+                                        colorize=True,
+                                        serialize=False,
+                                        backtrace=True, diagnose=True,
+                                        enqueue=False, context=None,
+                                        catch=True)
diff --git a/examples/FinanceBench-Lite/program-store.yml b/examples/FinanceBench-Lite/program-store.yml
new file mode 100644
index 000000000..36e65732c
--- /dev/null
+++ b/examples/FinanceBench-Lite/program-store.yml
@@ -0,0 +1,36 @@
+quick-ratio:
+  task: Assess liquidity health of {COMPANY} through its `Quick Ratio` as at {PERIOD} fiscal period end
+
+  sub-htps:
+    - task: |-
+        Calculate `Quick Ratio` of {COMPANY} as at {PERIOD} fiscal period end as decimal value according to formula:
+
+        `Quick Ratio` = (
+          (`Cash & Cash Equivalents` +
+           `Short-Term Investments or (Current) Marketable Securities` +
+           `(Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables`)
+          / `(Total) Current Liabilities`
+        )
+
+      sub-htps:
+        # 1 single Retrieval task for multiple quantities on same statement, for both efficiency & mutual consistency;
+        # retrieve individual numerator & denominator balance values only, without taking division
+        # because RAG LMs may not be good at calculation & mathematical reasoning
+        - task: |-
+            What are values in dollars of:
+            - `Cash & Cash Equivalents`;
+            - `Short-Term Investments or (Current) Marketable Securities`;
+            - `(Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables`; and
+            - `(Total) Current Liabilities`
+            (or most similar-meaning reported line items to those)
+
+            on one same `(Consolidated) Balance Sheet, a.k.a. Statement of (Consolidated) Financial Position`
+            (or most similar-meaning statement) of {COMPANY}
+            (and NOT Balance Sheets of its acquired and/or divested companies)
+
+            as at {PERIOD} fiscal period end?
+
+    - task: |-
+        Compare calculated `Quick Ratio` decimal value against 1.00 and make assessment:
+        - `Quick Ratio` >= 1.00: liquidity is healthy; or
+        - `Quick Ratio` < 1.00: liquidity is not very healthy
diff --git a/examples/FinanceBench-Lite/rag-ground-truths.yml b/examples/FinanceBench-Lite/rag-ground-truths.yml
new file mode 100644
index 000000000..6ef352009
--- /dev/null
+++ b/examples/FinanceBench-Lite/rag-ground-truths.yml
@@ -0,0 +1,914 @@
+defs:
+
+  BS: (Consolidated) Balance Sheet, a.k.a. Statement of (Consolidated) Financial Position
+
+  cash-and-equiv: Cash & Cash Equivalents
+  st-invest: Short-Term Investments or (Current) Marketable Securities
+  recvables: (Net) Accounts Receivable, a.k.a. (Net) (Trade) Receivables
+  invent: (Total) (Net) Inventory(ies)
+  curr-assets: (Total) Current Assets
+  fixed-assets: (Net) Fixed Assets, a.k.a. (Net) Property, Plant & Equipment (PP&E)
+  total-assets: Total Assets
+
+  payables: Accounts Payable
+  st-debt: Short-Term Debt, or Current Portion of (Long-Term) Debt
+  curr-liabs: (Total) Current Liabilities
+  lt-debt: Long-Term Debt (EXCLUDING any current/short-term portion)
+
+
+  CF: (Consolidated) Cash Flow(s) Statement(s), a.k.a. (Consolidated) Statement(s) of Cash Flows
+
+  d&a: Depreciation & Amortization, a.k.a. D&A (of Fixed Assets or Property, Plant & Equipment (PP&E))
+  op-cf: (Net) Cash Flows from Operations, a.k.a. (Net) Operating Cash Flows
+
+  capex: Capital Expenditure(s), a.k.a. CapEx, or Capital Spending, or Property, Plant & Equipment (PP&E) Expenditure(s)/Purchase(s)
+
+  div: Cash Dividends
+
+
+  P&L: >-
+    (Consolidated) Income Statement, a.k.a. (Consolidated) Profit-and-Loss (P&L) Statement,
+    or (Consolidated) Earnings Statement, or (Consolidated) Operations Statement
+
+  rev: (Total) (Net) (Operating) Revenue(s), a.k.a. (Total) (Net) Sales
+  cogs: (Total) Cost of Goods Sold, a.k.a. (Total) COGS, or (Total) Cost of Sales, or (Total) Cost of Revenue
+  gross: Gross Income, a.k.a. Gross Profit, or Gross Earnings (or Loss(es))
+  op: (Unadjusted) Operating Income, a.k.a. Operating Profit, or Operating Earnings (or Loss(es))
+  ebitda: (Unadjusted) Earnings before Interest, Tax, Depreciation & Amortization, a.k.a. EBITDA
+  ebit: Earnings before Interest & Tax, a.k.a. EBIT
+  int: Interest Expense
+  ebt: Income or Profit or Earnings (or Loss(es)) before (Income) Tax(es)
+  inc-tax: (Income) Tax Expense
+  net: Net Income, a.k.a. Net Profit, or Net Earnings (or Loss(es)) (Attributable to Shareholders)
+
+
+ground-truths:
+
+  3M_2018_10K:
+    BS:
+      fixed-assets:
+        2018: 8,738 million or 8.7 billion
+        2017: 8,866 million or 8.9 billion  # unreliable
+
+
+  3M_2022_10K:
+    BS:
+      fixed-assets:
+        2022: 9,178 million
+        2021: 9,429 million
+
+      total-assets:
+        2022: 46,455 million
+        2021: 47,072 million
+
+    CF:
+      capex:
+        2022: 1,749 million
+        2021: 1,603 million
+        2020: 1,501 million  # unreliable
+
+    P&L:
+      rev:
+        2022: 34,229 million
+        2021: 35,355 million
+        2020: 32,184 million
+
+      net:
+        2022: 5,777 million
+        2021: 5,921 million
+        2020: 5,449 million
+
+
+  3M_2023Q2_10Q:
+    BS:
+      cash-and-equiv:
+        2023Q2: 4,258 million  # unreliable
+        2022: 3,655 million
+
+      st-invest:
+        2023Q2: 56 million
+        2022: 238 million
+
+      recvables:
+        2023Q2: 4,947 million
+        2022: 4,532 million
+
+      invent:
+        2023Q2: 5,280 million
+        2022: 5,372 million
+
+      curr-assets:
+        2023Q2: 15,754 million
+        2022: 14,688 million
+
+      curr-liabs:
+        2023Q2: 10,936 million
+        2022: 9,523 million
+
+
+  ACTIVISIONBLIZZARD_2019_10K:
+    BS:
+      fixed-assets:
+        2019: 253 million
+        2018: 282 million
+
+    CF:
+      capex:
+        2019: 116 million
+        2018: 131 million
+        2017: 155 million  # unreliable
+
+    P&L:
+      rev:
+        2019: 6,489 million
+        2018: 7,500 million
+        2017: 7,017 million
+
+
+  ADOBE_2015_10K:
+    BS:
+      curr-liabs:
+        2015: 2,213.556 million or 2,213.6 million or 2.21 billion or 2.2 billion
+        2014: 2,494.435 million or 2,494.4 million or 2.49 billion or 2.5 billion
+
+    CF:
+      op-cf:
+        2015: 1,469.502 million or 1,469.5 million or 1.47 billion or 1.5 billion
+        2014: 1,287.482 million or 1,287.5 million or 1.29 billion or 1.3 billion
+        2013: 1,151.686 million or 1,151.6 million or 1.15 billion or 1.2 billion
+
+
+  ADOBE_2016_10K:
+    P&L:
+      op:
+        2016: 1,493.602 million or 1,493.6 million or 1.49 billion or 1.5 billion  # unreliable
+        2015: 903.095 million or 903.1 million or 0.9 billion  # unreliable
+        2014: 412.685 million or 412.7 million or 0.41 billion or 0.4 billion  # unreliable
+
+
+  ADOBE_2017_10K:
+    BS:
+      curr-liabs:
+        2017: 3,527.457 million or 3,527.5 million or 3.53 billion or 3.5 billion
+        2016: 2,811.635 million or 2,811.6 million or 2.81 billion or 2.8 billion
+
+    CF:
+      op-cf:
+        2017: 2,912.853 million or 2,912.9 million or 2.91 billion or 2.9 billion
+        2016: 2,199.728 million or 2,199.7 million or 2.2 billion
+        2013: 1,469.502 million or 1,469.5 million or 1.47 billion or 1.5 billion  # unreliable
+
+
+  ADOBE_2022_10K:
+    CF:
+      op-cf:
+        2022: 7,838 million
+        2021: 7,230 million
+        2020: 5,727 million
+
+      capex:
+        2022: 442 million  # unreliable
+        2021: 348 million  # unreliable
+        2020: 419 million  # unreliable
+
+    P&L:
+      rev:
+        2022: 17,606 million  # unreliable
+        2021: 15,785 million
+        2020: 12,868 million
+
+      op:
+        2022: 6,098 million
+        2021: 5,802 million
+        2020: 4,237 million
+
+      net:
+        2022: 4,756 million
+        2021: 4,822 million
+        2020: 5,260 million
+
+
+  AES_2022_10K:
+    BS:
+      invent:
+        2022: 1,055 million
+        2021: 604 million
+
+      total-assets:
+        2022: 38,363 million
+        2021: 32,963 million
+
+    P&L:
+      cogs:
+        2022: 10,069 million  # unreliable
+        2021: 8,430 million  # unreliable
+        2020: 6,967 million  # unreliable
+
+      net:
+        2022: negative (loss) 546 million
+        2021: negative (loss) 409 million  # unreliable
+        2020: 46 million
+
+
+  AMAZON_2017_10K:
+    BS:
+      invent:
+        2017: 16,047 million
+        2016: 11,461 million  # unreliable
+
+      payables:
+        2017: 34,616 million
+        2016: 25,309 million
+
+    P&L:
+      rev:
+        2017: 177,866 million
+        2016: 135,987 million
+        2015: 107,006 million
+
+      cogs:
+        2017: 111,934 million  # unreliable: often mistaken for Total Operating Expenses $173,760 million
+        2016: 88,265 million  # unreliable: often mistaken for Total Operating Expenses $131,801 million
+        2015: 71,651 million
+
+
+  AMCOR_2020_10K:
+    BS:
+      recvables:
+        2020: 1,615.9 million  # unreliable
+        2019: 1,864.3 million  # unreliable
+
+
+  AMCOR_2023_10K:
+    BS:
+      cash-and-equiv:
+        2023: 689 million
+        2022: 775 million
+
+      st-invest:
+        2023: 0 (or not explicitly reported)
+        2022: 0 (or not explicitly reported)
+
+      recvables:
+        2023: 1,875 million  # unreliable
+        2022: 1,935 million
+
+      invent:
+        2023: 992 million + 1,221 million, or 2,213 million
+        2022: 1,114 million + 1,325 million, or 2,439 million
+
+      curr-assets:
+        2023: 5,308 million
+        2022: 5,853 million
+
+      curr-liabs:
+        2023: 4,476 million
+        2022: 5,103 million
+
+    P&L:
+      rev:
+        2023: 14,694 million
+        2022: 14,544 million
+        2021: 12,861 million
+
+      gross:
+        2023: 2,725 million
+        2022: 2,820 million
+        2021: 2,732 million
+
+
+  AMCOR_2023Q4_EARNINGS:
+    P&L:
+      rev:
+        2023Q4: 3,673 million
+        2023FY: 14,694 million
+        2022Q4: 3,909 million
+        2022FY: 14,544 million
+
+      ebitda:
+        2023Q4: 540 million  # unreliable: FY & Quarter numbers often mistaken for each other
+        2023FY: 2,018 million  # unreliable: FY & Quarter numbers often mistaken for each other
+
+
+  AMD_2015_10K:
+    CF:
+      d&a:
+        2015: 167 million
+        2014: 203 million
+        2013: 236 million
+
+    P&L:
+      rev:
+        2015: 3,991 million
+        2014: 5,506 million
+        2013: 5,299 million
+
+
+  AMD_2022_10K:
+    BS:
+      cash-and-equiv:
+        2022: 4,835 million  # unreliable
+        2021: 2,535 million  # unreliable
+
+      st-invest:
+        2022: 1,020 million
+        2021: 1,073 million
+
+      recvables:
+        2022: 4,126 million  # unreliable
+        2021: 2,706 million  # unreliable
+
+      invent:
+        2022: 3,771 million
+        2021: 1,955 million  # unreliable
+
+      curr-assets:
+        2022: 15,019 million
+        2021: 8,583 million
+
+      curr-liabs:
+        2022: 6,369 million
+        2021: 4,240 million
+
+
+  AMERICANWATERWORKS_2021_10K:
+    CF:
+      d&a:
+        2021: 636 million  # unreliable
+        2020: 604 million  # unreliable
+        2019: 582 million  # unreliable
+
+    P&L:
+      op:
+        2021: 1,196 million
+        2020: 1,248 million
+        2019: 1,214 million
+
+
+  AMERICANWATERWORKS_2022_10K:
+    BS:
+      curr-assets:
+        2022: 1,250 million
+        2021: 1,554 million
+
+      curr-liabs:
+        2022: 2,811 million
+        2021: 2,141 million
+
+
+  BESTBUY_2017_10K:
+    P&L:
+      rev:
+        2017: 39,403 million
+        2016: 39,528 million
+        2015: 40,339 million
+
+      net:
+        2017: 1,228 million  # unreliable: often mistaken for Net Earnings (Loss) from Continuing Operations $1,207m
+        2016: 897 million  # unreliable: often mistaken for Net Earnings (Loss) from Continuing Operations $807m
+        2015: 1,233 million  # unreliable: often mistaken for Net Earnings (Loss) from Continuing Operations $1,246m
+
+
+  BESTBUY_2019_10K:
+    BS:
+      invent:
+        2019: 5,409 million
+        2018: 5,209 million
+
+
+  BESTBUY_2023_10K:
+    P&L:
+      rev:
+        2023: 46,298 million or 46.3 billion
+        2022: 51,761 million or 51.8 billion
+        2021: 47,262 million or 47.3 billion
+
+      gross:
+        2023: 9,912 million or 9.9 billion  # unreliable
+        2022: 11,640 million or 11.6 billion
+        2021: 10,573 million or 10.6 billion
+
+
+  BLOCK_2016_10K:
+    BS:
+      curr-assets:
+        2016: 1,001,425 or 1,001.4 million or 1.0 billion
+        2015: 705,563 or 705.6 million or 0.7 billion
+
+      curr-liabs:
+        2016: 577,464 or 577.5 million or 0.6 billion  # unreliable
+        2015: 334,202 or 334.2 million or 0.3 billion  # unreliable
+
+
+  BOEING_2018_10K:
+    BS:
+      fixed-assets:
+        2018: 12,645 million  # unreliable: 2018 & 2017 numbers often mixed up
+        2017: 12,672 million  # unreliable: 2018 & 2017 numbers often mixed up
+
+
+  BOEING_2022_10K:
+    P&L:
+      rev:
+        2022: 66,608 million
+        2021: 62,286 million
+        2020: 58,158 million
+
+      gross:
+        2022: 3,502 million  # unreliable because of missing line-time label
+        2021: 3,017 million  # unreliable because of missing line-time label
+        2020: negative (loss) 5,685 million  # unreliable because of missing line-time label
+
+      ebt:
+        2022: negative (loss) 5,022 million
+        2021: negative (loss) 5,033 million
+        2020: negative (loss) 14,476 million
+
+      inc-tax:
+        2022: tax of 31 million
+        2021: tax benefit of 743 million
+        2020: tax benefit of 2,535 million
+
+
+  COCACOLA_2017_10K:
+    BS:
+      total-assets:
+        2017: 36,545 million  # unreliable
+        2016: 34,010 million  # unreliable
+
+    P&L:
+      net:
+        2017: 1,248 million
+        2016: 6,527 million
+        2015: 7,351 million
+
+
+  COCACOLA_2021_10K:
+    P&L:
+      rev:
+        2021: 38,655 million
+        2020: 33,014 million
+        2019: 37,266 million
+
+      cogs:
+        2021: 15,357 million
+        2020: 13,433 million  # unreliable
+        2019: 14,619 million  # unreliable
+
+
+  COCACOLA_2022_10K:
+    CF:
+      div:
+        2022: 7,616 million
+        2021: 7,252 million
+        2020: 7,047 million
+
+    P&L:
+      net:
+        2022: 9,542 million
+        2021: 9,771 million
+        2020: 7,747 million
+
+
+  CORNING_2020_10K:
+    BS:
+      invent:
+        2020: 2,438 million
+        2019: 2,320 million
+
+      payables:
+        2020: 1,174 million  # unreliable: often mistaken for Other Accrued Liabilities #2,437m
+        2019: 1,587 million  # unreliable: often mistaken for Other Accrued Liabilities $1,923m
+
+    P&L:
+      cogs:
+        2020: 7,772 million  # unreliable: often failing to be retrieved at all
+        2019: 7,468 million  # unreliable: often failing to be retrieved at all
+        2018: 6,829 million  # unreliable: often failing to be retrieved at all
+
+
+  CORNING_2021_10K:
+    P&L:
+      rev:
+        2021: 14,082 million  # unreliable
+        2020: 11,303 million
+        2019: 11,503 million
+
+      op:
+        2021: 2,112 million
+        2020: 509 million
+        2019: 1,306 million
+
+
+  CORNING_2022_10K:
+    BS:
+      curr-assets:
+        2022: 7,453 million
+        2021: 7,659 million
+
+      curr-liabs:
+        2022: 5,175 million
+        2021: 4,806 million
+
+
+  CVSHEALTH_2018_10K:
+    BS:
+      fixed-assets:
+        2018: 11,349 million  # unreliable: often failing to be retrieved at all
+        2017: 10,292 million  # unreliable: often failing to be retrieved at all
+
+    P&L:
+      rev:
+        2018: 194,579 million  # unreliable: often mistaken for Pharmacy Services 2018 revenue $134,128m or Retail/LTC 2018 revenue $83,989m
+        2017: 184,786 million  # unreliable: often mistaken for Pharmacy Services 2017 revenue $130,601m
+        2016: 177,546 million
+
+
+  CVSHEALTH_2022_10K:
+    BS:
+      fixed-assets:
+        2022: 12,873 million  # unreliable
+        2021: 12,896 million
+
+      total-assets:
+        2022: 228,275 million
+        2021: 232,999 million
+
+    CF:
+      capex:
+        2022: 2,727 million or 2.7 billion
+        2021: 2,520 million or 2.5 billion
+        2020: 2,437 million or 2.4 billion
+
+    P&L:
+      rev:
+        2022: 322,467 million
+        2021: 292,111 million
+        2020: 268,706 million
+
+      net:
+        2022: 4,149 million
+        2021: 7,910 million  # unreliable
+        2020: 7,179 million  # unreliable
+
+
+  GENERALMILLS_2019_10K:
+    BS:
+      recvables:
+        2019: 1,679.7 million
+        2018: 1,684.2 million  # unreliable
+
+      invent:
+        2019: 1,559.3 million
+        2018: 1,642.2 million  # unreliable
+
+      payables:
+        2019: 2,854.1 million
+        2018: 2,746.2 million  # unreliable
+
+    P&L:
+      rev:
+        2019: 16,865.2 million
+        2018: 15,740.4 million
+        2017: 15,619.8 million
+
+      cogs:
+        2019: 11,108.4 million
+        2018: 10,304.8 million
+        2017: 10,052.0 million
+
+
+  GENERALMILLS_2020_10K:
+    BS:
+      curr-assets:
+        2020: 5,121.3 million
+        2019: 4,186.5 million
+
+      curr-liabs:
+        2020: 7,491.5 million
+        2019: 7,087.1 million
+
+    CF:
+      op-cf:
+        2020: 3,676.2 million
+        2019: 2,807.0 million
+        2018: 2,841.0 million
+
+      capex:
+        2020: 460.8 million
+        2019: 537.6 million
+        2018: 622.7 million
+
+
+  GENERALMILLS_2022_10K:
+    CF:
+      div:
+        2022: 1,244.5 million
+        2021: 1,246.4 million
+        2020: 1,195.8 million
+
+    P&L:
+      net:
+        2022: 2,707.3 million  # unreliable
+        2021: 2,339.8 million  # unreliable
+        2020: 2,181.2 million  # unreliable
+
+
+  JOHNSON_JOHNSON_2022_10K:
+    BS:
+      invent:
+        2022: 12,483 million
+        2021: 10,387 million
+
+    P&L:
+      cogs:
+        2022: 31,089 million
+        2021: 29,855 million
+        2020: 28,427 million
+
+
+  KRAFTHEINZ_2019_10K:
+    BS:
+      invent:
+        2019: 2,721 million
+        2018: 2,667 million
+
+    P&L:
+      cogs:
+        2019: 16,830 million
+        2018: 17,347 million  # unreliable
+        2017: 17,043 million
+
+
+  LOCKHEEDMARTIN_2020_10K:
+    BS:
+      total-assets:
+        2020: 50,710 million
+        2019: 47,528 million
+
+    P&L:
+      rev:
+        2020: 65,398 million
+        2019: 59,812 million  # unreliable
+        2018: 53,762 million
+
+
+  LOCKHEEDMARTIN_2021_10K:
+    BS:
+      curr-assets:
+        2021: 19,815 million
+        2020: 19,378 million
+
+      curr-liabs:
+        2021: 13,997 million
+        2020: 13,933 million
+
+
+  LOCKHEEDMARTIN_2022_10K:
+    P&L:
+      rev:
+        2022: 65,984 million
+        2021: 67,044 million
+        2020: 65,398 million
+
+
+  MGMRESORTS_2018_10K:
+    BS:
+      payables:
+        2018: 302.578 million or 302.6 million or 0.3 billion
+        2017: 255.028 million or 255 million or 0.26 billion or 0.3 billion
+
+
+  MGMRESORTS_2020_10K:
+    CF:
+      capex:
+        2020: 270.579 million or 271 million
+        2019: 739.006 million or 739 million  # unreliable
+        2018: 1,486.843 million or 1,487 million  # unreliable
+
+    P&L:
+      rev:
+        2020: 5,162.082 million or 5,162 million
+        2019: 12,899.672 million or 12,900 million  # unreliable
+        2018: 11,763.096 million or 11,763 million
+
+
+  # MGMRESORTS_2022Q4_EARNINGS:
+    # P&L:
+      # ebit:
+      # int:
+
+
+  MICROSOFT_2016_10K:
+    P&L:
+      cogs:
+        2016: 32,780 million  # unreliable
+        2015: 33,038 million  # unreliable
+        2014: 27,078 million  # unreliable
+
+
+  MICROSOFT_2023_10K:
+    BS:
+      st-debt:
+        2023: 5,247 million
+        2022: 2,749 million
+
+      lt-debt:
+        2023: 41,990 million
+        2022: 47,032 million
+
+
+  NETFLIX_2015_10K:
+    CF:
+      d&a:
+        2015: 62.283 million or 62 million  # unreliable: often failing to be retrieved at all
+        2014: 54.028 million or 54 million  # unreliable: often failing to be retrieved at all
+        2013: 48.374 million or 48 million  # unreliable: often failing to be retrieved at all
+
+    P&L:
+      rev:
+        2015: 6,779.511 million or 6,780 million
+        2014: 5,504.656 million or 5,505 million
+        2013: 4,374.562 million or 4,375 million
+
+      op:
+        2015: 305.826 million or 306 million
+        2014: 402.648 million or 403 million
+        2013: 228.347 million or 228 million
+
+
+  NIKE_2018_10K:
+    P&L:
+      rev:
+        2018: 36,397 million
+        2017: 34,350 million
+        2016: 32,376 million
+
+      cogs:
+        2018: 20,441 million
+        2017: 19,038 million
+        2016: 17,405 million
+
+
+  NIKE_2021_10K:
+    BS:
+      invent:
+        2021: 6,854 million
+        2020: 7,367 million
+
+    P&L:
+      cogs:
+        2021: 24,576 million
+        2020: 21,162 million  # unreliable
+        2019: 21,643 million
+
+
+  PAYPAL_2022_10K:
+    BS:
+      curr-assets:
+        2022: 57,517 million
+        2021: 52,574 million
+
+      curr-liabs:
+        2022: 45,101 million
+        2021: 43,029 million
+
+
+  PEPSICO_2021_10K:
+    CF:
+      capex:
+        2021: 4,625 million
+        2020: 4,240 million
+        2019: 4,232 million
+
+
+  PEPSICO_2022_10K:
+    CF:
+      d&a:
+        2022: 2,763 million  # unreliable
+        2021: 2,710 million  # unreliable
+        2020: 2,548 million
+
+      capex:
+        2022: 5,207 million
+        2021: 4,625 million
+        2020: 4,240 million
+
+    P&L:
+      rev:
+        2022: 86,392 million  # unreliable
+        2021: 79,474 million  # unreliable
+        2020: 70,372 million  # unreliable
+
+      op:
+        2022: 11,512 million
+        2021: 11,162 million
+        2020: 10,080 million
+
+
+  PFIZER_2021_10K:
+    BS:
+      fixed-assets:
+        2021: 14,882 million  # unreliable
+        2020: 13,745 million  # unreliable
+
+
+  VERIZON_2022_10K:
+    BS:
+      cash-and-equiv:
+        2022: 2,605 million
+        2021: 2,921 million
+
+      st-invest:
+        2022: 0 (or not explicitly reported)
+        2021: 0 (or not explicitly reported)
+
+      recvables:
+        2022: 24,506 million  # unreliable
+        2021: 23,846 million  # unreliable
+
+      invent:
+        2022: 2,388 million
+        2021: 3,055 million
+
+      curr-assets:
+        2022: 37,857 million
+        2021: 36,728 million
+
+      fixed-assets:
+        2022: 107,434 million
+        2021: 99,696 million
+
+      total-assets:
+        2022: 379,680 million
+        2021: 366,596 million
+
+      curr-liabs:
+        2022: 50,171 million
+        2021: 47,160 million
+
+    CF:
+      capex:
+        2022: 23,087 million  # unreliable
+        2021: 20,286 million  # unreliable
+        2020: 18,192 million  # unreliable
+
+    P&L:
+      rev:
+        2022: 136,835 million
+        2021: 133,613 million
+        2020: 128,292 million
+
+      net:
+        2022: 21,256 million  # unreliable
+        2021: 22,065 million
+        2020: 17,801 million
+
+
+  WALMART_2018_10K:
+    BS:
+      invent:
+        2018: 43,783 million
+        2017: 43,046 million
+
+      payables:
+        2018: 46,092 million
+        2017: 41,433 million
+
+    P&L:
+      cogs:
+        2018: 373,396 million  # unreliable
+        2017: 361,256 million  # unreliable
+        2016: 360,984 million  # unreliable
+
+
+  WALMART_2019_10K:
+    P&L:
+      rev:
+        2019: 514,405 million  # unreliable
+        2018: 500,343 million  # unreliable
+        2017: 485,873 million
+
+      op:
+        2019: 21,957 million
+        2018: 20,437 million
+        2017: 22,764 million  # unreliable
+
+
+  WALMART_2020_10K:
+    CF:
+      d&a:
+        2020: 10,987 million
+        2019: 10,678 million
+        2018: 10,529 million
+
+    P&L:
+      rev:
+        2020: 523,964 million  # unreliable
+        2019: 514,405 million  # unreliable
+        2018: 500,343 million
+
+      op:
+        2020: 20,568 million
+        2019: 21,957 million
+        2018: 20,437 million
diff --git a/examples/FinanceBench-Lite/util.py b/examples/FinanceBench-Lite/util.py
new file mode 100644
index 000000000..3025beadb
--- /dev/null
+++ b/examples/FinanceBench-Lite/util.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from dataclasses import dataclass
+from functools import wraps
+from typing import TYPE_CHECKING
+
+from loguru import logger
+from tqdm import tqdm
+
+from data_and_knowledge import FbId, Answer, FB_IDS, DOC_NAMES_BY_FB_ID, QS_BY_FB_ID, OUTPUT_FILE_PATH, get_or_create_output_df  # noqa: E501
+from eval import eval_correctness, eval_all
+from log import switch_log_file
+
+if TYPE_CHECKING:
+    from pandas import DataFrame
+
+
+type QAFunc = Callable[[FbId], Answer]
+
+
+@dataclass
+class enable_batch_qa_and_eval:  # noqa: N801
+    output_name: str
+
+    def __call__(self, qa_func: QAFunc) -> QAFunc:
+        @wraps(wrapped=qa_func)
+        def decorated_qa_func(fb_id: FbId) -> Answer | None:
+            if 'all' in fb_id.lower():
+                for _fb_id in tqdm(FB_IDS):
+                    # run inferencing and preliminarily evaluate
+                    eval_correctness(fb_id=_fb_id, answer=qa_func(_fb_id), output_name=self.output_name, human=False)
+
+                # rigorously evaluate again, including human evaluation for difficult cases
+                eval_all(output_name=self.output_name, refresh=True)
+                return None
+
+            if 'from:' in fb_id.lower():
+                for _fb_id in tqdm(FB_IDS[FB_IDS.index(fb_id[5:]):]):
+                    # run inferencing and preliminarily evaluate
+                    eval_correctness(fb_id=_fb_id, answer=qa_func(_fb_id), output_name=self.output_name, human=False)
+
+                # rigorously evaluate again, including human evaluation for difficult cases
+                eval_all(output_name=self.output_name, refresh=True)
+                return None
+
+            # run inferencing and evaluate
+            eval_correctness(fb_id=fb_id, answer=(answer := qa_func(fb_id)), output_name=self.output_name, human=True)
+            return answer
+
+        return decorated_qa_func
+
+
+@dataclass
+class log_qa_and_update_output_file:  # noqa: N801
+    output_name: str
+
+    def __call__(self, qa_func: QAFunc) -> QAFunc:
+        @wraps(wrapped=qa_func)
+        def decorated_qa_func(fb_id: FbId) -> Answer:
+            switch_log_file(fb_id=fb_id, output_name=self.output_name)
+
+            logger.info((question := f'\n{fb_id}\n{DOC_NAMES_BY_FB_ID[fb_id]}:\n{QS_BY_FB_ID[fb_id]}\n') +
+                        '\n... solving process starting ...\n',
+                        depth=1)
+
+            logger.info(question + (f'\n{self.output_name.upper()}:\n'
+                                    f'{(answer := qa_func(fb_id)).replace('{', '{{').replace('}', '}}')}\n'),
+                        depth=1)
+
+            output_df: DataFrame = get_or_create_output_df()
+            output_df.loc[fb_id, self.output_name]: str = answer
+            output_df.to_csv(OUTPUT_FILE_PATH, index=True)
+
+            return answer
+
+        return decorated_qa_func
diff --git a/examples/Planning-and-Reasoning.ipynb b/examples/Planning-and-Reasoning.ipynb
deleted file mode 100644
index f85d915e9..000000000
--- a/examples/Planning-and-Reasoning.ipynb
+++ /dev/null
@@ -1,257 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Problem-Solving Agent with Planning, Reasoning & Domain Knowledge: illustrative example using `FinanceBench` financial-analysis dataset\n",
-    "\n",
-    "This notebook illustrates the use of `OpenSSA`'s `Agent` and its planning, reasoning & domain knowledge integration capabilities to solve a problem in the financial-analysis domain."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Setups"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pprint import pprint\n",
-    "from IPython.display import display, Markdown"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "if cwd_is_root := ('examples' in os.listdir()):\n",
-    "    sys.path.append('examples')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "from dotenv import load_dotenv\n",
-    "\n",
-    "load_dotenv(dotenv_path=Path('examples' if cwd_is_root else '.') / '.env')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Imports of Agent, Planning, Reasoning & Resource classes from `OpenSSA`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from openssa import (Agent,\n",
-    "                     HTP, AutoHTPlanner,\n",
-    "                     OodaReasoner,\n",
-    "                     FileResource)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Problem to Solve and Knowledge & Resource available for use"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# problem to solve\n",
-    "PROBLEM = 'Does AMD have a healthy liquidity profile based on FY22 Quick Ratio?'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# available domain knowledge (stored as string)\n",
-    "from FinanceBench.data_and_knowledge import EXPERT_KNOWLEDGE as FINANCIAL_KNOWLEDGE"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# available informational resource: AMD's 2022 10K filing\n",
-    "\n",
-    "from FinanceBench.data_and_knowledge import Doc as FinancialDoc\n",
-    "\n",
-    "AMD_2022_10K = FileResource(path=FinancialDoc('AMD_2022_10K').dir_path)\n",
-    "\n",
-    "display(Markdown(AMD_2022_10K.overview))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Problem-Solving by Agent with Hierarchical Task Planning (HTP) & OODA Reasoning (OODAR)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "agent = Agent(planner=AutoHTPlanner(max_depth=2, max_subtasks_per_decomp=3),\n",
-    "              reasoner=OodaReasoner(),\n",
-    "              resources={AMD_2022_10K})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Problem-Solving with Automated Dynamic Planning (default)\n",
-    "\n",
-    "Without additional domain knowledge and expert inputs, the `agent` can attempt to solve the stated problem by using its Planner to decompose the problem into a 1-level-deep sub-task plan and execute that plan using its OODA Reasoner.\n",
-    "\n",
-    "At any point during the OODA reasoning execution, if a confident answer cannot be established for the concerned sub-task, the `agent` would use the Planner again to decompose that sub-task 1 level further. This recursive decomposition can be done up to the `agent`'s maximum allowed planning depth.\n",
-    "\n",
-    "This default solving mechanism provides a baseline that is often acceptable for domains that are popularly known/understood."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "solution_from_auto_plan_dynamically_executed = agent.solve(PROBLEM)\n",
-    "\n",
-    "display(Markdown(solution_from_auto_plan_dynamically_executed))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Problem-Solving with Expert-Guided Planning\n",
-    "\n",
-    "One way to make the solution highly accurate and reliable is to provide the `agent` with plan from a knowledgeable expert:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "expert_plan = HTP.from_dict(\n",
-    "    {\n",
-    "        'task': PROBLEM,\n",
-    "        'sub-plans': [\n",
-    "            {\n",
-    "                'task': 'calculate Quick Ratio conservatively as (`Cash & Cash Equivalents` + `Accounts Receivable`) / Current Liabilities',\n",
-    "                'sub-plans': [\n",
-    "                    {\n",
-    "                        'task': 'retrieve `Cash & Cash Equivalents`, `Accounts Receivable` & `Current Liabilities` from Balance Sheet'\n",
-    "                    },\n",
-    "                ]\n",
-    "            },\n",
-    "            {\n",
-    "                'task': 'see whether Quick Ratio is healthy, i.e. greater than 1'\n",
-    "            },\n",
-    "        ]\n",
-    "    }\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "expert_guided_solution = agent.solve(PROBLEM, plan=expert_plan)\n",
-    "\n",
-    "display(Markdown(expert_guided_solution))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Problem-Solving with Domain Knowledge Injection\n",
-    "\n",
-    "If expert-guided solution plans are not readily available in your use case, another and sometimes lighter-weight way to achieve consistently good problem-solving outcomes is to give the `agent` access to domain-specific knowledge, so that such knowledge can be used for constructing effective solution plans for problems in the concerned domain, and for reasoning accurately during the execution process:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "agent_with_knowledge = Agent(planner=AutoHTPlanner(max_depth=2, max_subtasks_per_decomp=3),\n",
-    "                             reasoner=OodaReasoner(),\n",
-    "                             knowledge={FINANCIAL_KNOWLEDGE},\n",
-    "                             resources={AMD_2022_10K})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "solution_from_auto_plan_dynamically_executed_with_knowledge = agent_with_knowledge.solve(PROBLEM, dynamic=False)\n",
-    "\n",
-    "display(Markdown(solution_from_auto_plan_dynamically_executed_with_knowledge))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/Tutorial.ipynb b/examples/Tutorial.ipynb
deleted file mode 100644
index 04d9e4a85..000000000
--- a/examples/Tutorial.ipynb
+++ /dev/null
@@ -1,783 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Build an AI Agent with SEC Filing Insights in Just 10 Minutes Using OpenSSA\n",
-    "--------------"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### In this tutorial, you will learn how to:\n",
-    "\n",
-    "1. Build an AI Agent from scratch with Hierachichy Task Planing (HTP) using openSSA\n",
-    "2. Improve agent's performance by:\n",
-    "    - Incorporating external knowledge source\n",
-    "    - Providing customized plan from the expert\n",
-    "    - Enabling dynamic solving capability\n",
-    "\n",
-    "### By the end of this tutorial, you will understand:\n",
-    "- What is HTP and how it works?\n",
-    "- How to customize OpenSSA components to solve your complex problem?"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Setups"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's start by impporting the neccessary dependencies."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 64,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "from pprint import pprint\n",
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "from IPython.display import display, Markdown\n",
-    "from dotenv import load_dotenv\n",
-    "import yaml\n",
-    "\n",
-    "from openssa import Agent, HTP, AutoHTPlanner, OodaReasoner, FileResource\n",
-    "from openssa.utils.llms import OpenAILLM\n",
-    "from openssa.l2.task import Task"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Make sure you plave your OpenAI API key in `example/.env`\n",
-    "\n",
-    "```\n",
-    "OPENAI_API_KEY=...\n",
-    "```\n",
-    "\n",
-    "[Where do I find my OpenAI API Key?](https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# make sure we're in the right folder\n",
-    "if cwd_is_root := ('examples' in os.listdir()):\n",
-    "    sys.path.append('examples')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Sanity check if we have the OpenAI API setup:  True\n"
-     ]
-    }
-   ],
-   "source": [
-    "print('Sanity check if we have the OpenAI API setup: ', load_dotenv(dotenv_path=Path('examples' if cwd_is_root else '.') / '.env'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# util function to summarize answer\n",
-    "def summarize_ans(ans, max_tokens=100):\n",
-    "    llm=OpenAILLM()\n",
-    "    response = llm.call(\n",
-    "        messages=[\n",
-    "            {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
-    "            {\"role\": \"user\", \"content\": \"Please summarize the following text into 1-2 sentences: \" + ans}\n",
-    "        ],\n",
-    "        max_tokens=max_tokens,\n",
-    "        temperature=0.7\n",
-    "    )\n",
-    "    summary = response.choices[0].message.content\n",
-    "    return summary"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 120,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# util function to print results\n",
-    "import textwrap\n",
-    "\n",
-    "def namestr(obj, namespace):\n",
-    "    return [name for name in namespace if namespace[name] is obj]\n",
-    "\n",
-    "def print_solution(sol, present_full_answer=False):\n",
-    "    agent_name = namestr(sol, globals())[0].upper().replace('_', ' ')\n",
-    "    # print(agent_name)\n",
-    "    print('PROBLEM: ')\n",
-    "    print('='*80)\n",
-    "    print(PROBLEM, '\\n')\n",
-    "    if GROUND_TRUTH_ANSWER:\n",
-    "        print('GROUND TRUTH ANSWER: ')\n",
-    "        print('='*80)\n",
-    "        print(GROUND_TRUTH_ANSWER, '\\n')\n",
-    "    if present_full_answer:\n",
-    "        print(f'{agent_name} FULL:')\n",
-    "        print('='*80)\n",
-    "        print(textwrap.fill(sol, 80))\n",
-    "    else:\n",
-    "        print(f'{agent_name} SUMMARIZED:')\n",
-    "        print('='*80)\n",
-    "        print(textwrap.fill(summarize_ans(sol), 80))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Data preparation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We're going to use the FinanceBench dataset to demonstrate. FinanceBench is a dataset to benchmark question answering capability in financial domain.\n",
-    "\n",
-    "We have loaded a sample SEC filing for 3M from 2022. \n",
-    "https://github.com/patronus-ai/financebench/blob/main/pdfs/3M_2022_10K.pdf\n",
-    "\n",
-    "- Let's look at a sample question: \n",
-    "\n",
-    "`Is 3M a capital-intensive business based on FY2022 data`\n",
-    "\n",
-    "- The expected answer for this question is:\n",
-    "\n",
-    "`No, the company is managing its CAPEX and Fixed Assets pretty efficiently,\n",
-    "    which is evident from below key metrics:\n",
-    "    CAPEX/Revenue Ratio: 5.1%\n",
-    "    Fixed assets/Total Assets: 20%\n",
-    "    Return on Assets= 12.4%`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "DOC_PATH = 'sample_data/3M_2022_10K/'\n",
-    "PROBLEM = 'Is 3M a capital-intensive business based on FY2022 data?'\n",
-    "GROUND_TRUTH_ANSWER ='''\n",
-    "    No, the company is managing its CAPEX and Fixed Assets pretty efficiently,\n",
-    "    which is evident from below key metrics:\n",
-    "    CAPEX/Revenue Ratio: 5.1%\n",
-    "    Fixed assets/Total Assets: 20%\n",
-    "    Return on Assets= 12.4%'''"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now, we'll build an agent from scracth using [OpenSSA](https://www.openssa.org/)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Build an AI Agent from Scratch Using OpenSSA\n",
-    "------------"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Base Agent"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's build our first agent with all default settings. \n",
-    "\n",
-    "<img src=\"./FinanceBench/diagrams/base-agent.png\" height=\"100\" />"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To build an agent, the first and most basic resource we need is a document. We will learn how to enable hierarchical task planning (HTP) capability and how to customize it's component later. Let's first build a `Base Agent`` with only the document we've prepared in the previous block and see how well it can solve the question. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Build a base agent\n",
-    "base_agent = Agent(planner=None,\n",
-    "                   reasoner=OodaReasoner(),\n",
-    "                   knowledge=None,\n",
-    "                   resources={FileResource(path=DOC_PATH)})\n",
-    "\n",
-    "base_agent_answer = base_agent.solve(problem=PROBLEM,\n",
-    "                                       plan=None,\n",
-    "                                       dynamic=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 121,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "PROBLEM: \n",
-      "================================================================================\n",
-      "Is 3M a capital-intensive business based on FY2022 data? \n",
-      "\n",
-      "GROUND TRUTH ANSWER: \n",
-      "================================================================================\n",
-      "\n",
-      "    No, the company is managing its CAPEX and Fixed Assets pretty efficiently,\n",
-      "    which is evident from below key metrics:\n",
-      "    CAPEX/Revenue Ratio: 5.1%\n",
-      "    Fixed assets/Total Assets: 20%\n",
-      "    Return on Assets= 12.4% \n",
-      "\n",
-      "BASE AGENT ANSWER SUMMARIZED:\n",
-      "================================================================================\n",
-      "3M's financial statements for FY2022 show significant capital investments in\n",
-      "property, plant, and equipment (PP&E), with capital expenditures amounting to\n",
-      "$1,831 million and total assets reported at $46,455 million. The company's focus\n",
-      "on growth, productivity, and sustainability is reflected in its projected\n",
-      "capital spending of $1.5 billion to $1.8 billion for 2023, demonstrating a\n",
-      "commitment to supporting business activities and driving future growth through\n",
-      "capital investments and strategic resource management practices\n"
-     ]
-    }
-   ],
-   "source": [
-    "print_solution(base_agent_answer)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In this example, we can see the default answer is not that good. 3M is not a capital intensive business but the agent failed to answer the question correctly."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## How to Add External Knowledge to the Agent"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's incorporate external knowledge to the base agent. We've prepared a sample expert knowledge in `sample-data/expert-knowledge.txt` file, you can load your own knowledge by replacing the sample file with yours.\n",
-    "\n",
-    "<img src=\"./FinanceBench/diagrams/agent-with-knowledge.png\" height=\"100\" />"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 96,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(file='sample_data/expert-knowledge.txt',\n",
-    "          buffering=-1,\n",
-    "          encoding='utf-8',\n",
-    "          errors='strict',\n",
-    "          newline=None,\n",
-    "          closefd=True,\n",
-    "          opener=None) as f:\n",
-    "    EXPERT_KNOWLEDGE: str = f.read()\n",
-    "\n",
-    "EXPERT_KNOWLEDGE_SET = set(EXPERT_KNOWLEDGE.split('\\n\\n'))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In the added knowledge, we've specified \n",
-    "\n",
-    "```\n",
-    "Capital-Intensiveness / Return-on-Capital Metric Formulas\n",
-    "---------------------------------------------------------\n",
-    "\n",
-    "`Capital Intensity Ratio` = `Total Assets` / `(Total) (Net) (Operating) Revenue(s), a.k.a. (Total) (Net) Sales`\n",
-    "\n",
-    "`Return on (Total) Assets, a.k.a. RoA or RoTA` = (\n",
-    "  `Net Income, a.k.a. Net Profit, or Net Earnings (or Loss(es)) (Attributable to Shareholders)` /\n",
-    "  `average Total Assets, typically between two consecutive fiscal year-ends`\n",
-    ")\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's add the knowledge set to our base agent."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 97,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "agent_with_knowledge = Agent(planner=None,\n",
-    "                             reasoner=OodaReasoner(),\n",
-    "                             knowledge=EXPERT_KNOWLEDGE_SET,\n",
-    "                             resources={FileResource(path=DOC_PATH)})\n",
-    "\n",
-    "agent_with_knowledge_solution = agent_with_knowledge.solve(problem=PROBLEM,\n",
-    "                                                           plan=None,\n",
-    "                                                           dynamic=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 109,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "PROBLEM: \n",
-      "================================================================================\n",
-      "Is 3M a capital-intensive business based on FY2022 data? \n",
-      "\n",
-      "GROUND TRUTH ANSWER: \n",
-      "================================================================================\n",
-      "\n",
-      "    No, the company is managing its CAPEX and Fixed Assets pretty efficiently,\n",
-      "    which is evident from below key metrics:\n",
-      "    CAPEX/Revenue Ratio: 5.1%\n",
-      "    Fixed assets/Total Assets: 20%\n",
-      "    Return on Assets= 12.4% \n",
-      "\n",
-      "AGENT WITH KNOWLEDGE SOLUTION SUMMARIZED:\n",
-      "================================================================================\n",
-      "Based on the substantial capital expenditures, large asset base, and planned\n",
-      "future investments in operational infrastructure and capacity enhancement, it is\n",
-      "reasonable to classify 3M as a capital-intensive business for FY2022.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print_solution(agent_with_knowledge_solution)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Although the final answer is still incorrect, we can see the reasoning behind is getting better when using external resource - the agent can now recognize `assets`` need to be taken into account when looking at capital intensiveness questions."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Get started with HTP by Adding Auto-Plan on top of Knowledge"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We can see the agent is improved with added knowledge. Let's enhance it with OpenSSA's HTP feature: `AutoHTPlanner`.\n",
-    "\n",
-    "<img src=\"./FinanceBench/diagrams/auto-htp-agent-with-knowledge.png\" height=\"100\" />"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "`HTP` is OpenSSA’s default problem-solving task plan structure.\n",
-    "\n",
-    "A `HTP` instance is a tree, in which each node can be decomposed into a number of supporting sub-HTPs, each targeting to solve a supporting sub-task.\n",
-    "\n",
-    "`HTP` execution involves using a specified Reasoner to work through sub-tasks from the lowest levels and roll up results up to the top level.\n",
-    "\n",
-    "There is also a horizontal results-sharing mechanism to enable the execution of a subsequent HTP node to benefit from results from earlier nodes at the same depth level."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "`AutoHTPlanner` is OpenSSA’s default Planner to create and update problem-solving HTPs.\n",
-    "\n",
-    "Such a planner has an LM for generating new or updated task HTPs, the complexity of which is controlled by 2 key parameters `max_depth` and `max_subtasks_per_decomp`. \n",
-    "\n",
-    "<img src=\"./FinanceBench/diagrams/htp.png\" />\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "auto_htp_agent_with_knowledge = Agent(planner=AutoHTPlanner(max_depth=2, max_subtasks_per_decomp=4),\n",
-    "                                      reasoner=OodaReasoner(),\n",
-    "                                      knowledge=EXPERT_KNOWLEDGE_SET,\n",
-    "                                      resources={FileResource(path=DOC_PATH)})\n",
-    "\n",
-    "auto_htp_agent_with_knowledge_solution = auto_htp_agent_with_knowledge.solve(problem=PROBLEM,\n",
-    "                                                                             plan=None,\n",
-    "                                                                             dynamic=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You can read the full logs of all the intermediate steps in `logs/auto_htp_agent_with_knowledge_logs.txt`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 110,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "PROBLEM: \n",
-      "================================================================================\n",
-      "Is 3M a capital-intensive business based on FY2022 data? \n",
-      "\n",
-      "GROUND TRUTH ANSWER: \n",
-      "================================================================================\n",
-      "\n",
-      "    No, the company is managing its CAPEX and Fixed Assets pretty efficiently,\n",
-      "    which is evident from below key metrics:\n",
-      "    CAPEX/Revenue Ratio: 5.1%\n",
-      "    Fixed assets/Total Assets: 20%\n",
-      "    Return on Assets= 12.4% \n",
-      "\n",
-      "AUTO HTP AGENT WITH KNOWLEDGE SOLUTION SUMMARIZED:\n",
-      "================================================================================\n",
-      "Based on the available FY2022 data, 3M's net property, plant, and equipment\n",
-      "(PP&E) constitutes 19.75% of its total assets, indicating that it may not be\n",
-      "highly capital-intensive relative to some industries. However, without\n",
-      "additional information on capital expenditures (CapEx) to sales ratio,\n",
-      "depreciation and amortization expenses, and return on assets (RoA), a definitive\n",
-      "assessment of 3M's capital intensity cannot be made.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print_solution(auto_htp_agent_with_knowledge_solution)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We can see when breaking down the task into other sub-tasks, the agent gives more concrete reasons to answer the question: `key financial metrics such as the\n",
-    "proportion of net fixed assets to total assets, capital expenditure relative to\n",
-    "total net sales, depreciation and amortization expense as a percentage of total\n",
-    "net sales, and Return on Assets cannot be calculated without specific financial\n",
-    "data`. However, the final answer is still incorrect - the agent still fails to answer 3M is not a capital-intensive business."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Let's Upgrade the Agent to Solve the Problem Dynamically"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's enable another `HTP` component: `Dynamic` solving. When a problem is solved dynamically, it would be decomposed further if the sub-tasks are still not solvable.\n",
-    "\n",
-    "\n",
-    "<img src=\"./FinanceBench/diagrams/dynamic-auto-htp-agent-with-knowledge.png\" height=\"100\" />"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 103,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dynamic_auto_htp_agent_with_knowledge = Agent(planner=AutoHTPlanner(max_depth=2, max_subtasks_per_decomp=4),\n",
-    "                reasoner=OodaReasoner(),\n",
-    "                knowledge=EXPERT_KNOWLEDGE_SET,\n",
-    "                resources={FileResource(path=DOC_PATH)})\n",
-    "\n",
-    "dynamic_auto_htp_agent_with_knowledge_solution = dynamic_auto_htp_agent_with_knowledge.solve(problem=PROBLEM,\n",
-    "                                                                                             plan=None,\n",
-    "                                                                                             dynamic=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 111,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "PROBLEM: \n",
-      "================================================================================\n",
-      "Is 3M a capital-intensive business based on FY2022 data? \n",
-      "\n",
-      "GROUND TRUTH ANSWER: \n",
-      "================================================================================\n",
-      "\n",
-      "    No, the company is managing its CAPEX and Fixed Assets pretty efficiently,\n",
-      "    which is evident from below key metrics:\n",
-      "    CAPEX/Revenue Ratio: 5.1%\n",
-      "    Fixed assets/Total Assets: 20%\n",
-      "    Return on Assets= 12.4% \n",
-      "\n",
-      "DYNAMIC AUTO HTP AGENT WITH KNOWLEDGE SOLUTION SUMMARIZED:\n",
-      "================================================================================\n",
-      "Based on the FY2022 data provided, 3M is identified as a capital-intensive\n",
-      "business due to its significant capital expenditures, large total asset base,\n",
-      "focus on environmental expenditures, and structured asset management practices.\n",
-      "These factors collectively indicate a substantial investment in physical assets\n",
-      "and operational capabilities characteristic of capital-intensive businesses.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print_solution(dynamic_auto_htp_agent_with_knowledge_solution)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "With the added knowledge, neither solving statistically nore dynamically could help the agent to get to the final answer correctly. Let's customize the most powerful component of `HTP`: the plan."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Incorporating Expert HTP instead of Auto-HTP"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "With OpenSSA, the user can customize the plan instead of depending on the auto-generated plan. Let's add an expert plan on top of our beginning Base Agent to see how it performs. \n",
-    "\n",
-    "<img src=\"./FinanceBench/diagrams/expert-htp-agent.png\" height=\"100\" />"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We've prepared a sample expert plan, but please feel free to customize the expert plan yourself."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 112,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "variables = {\n",
-    "    'COMPANY': '3M',\n",
-    "    'PERIOD': '2022'\n",
-    "}\n",
-    "\n",
-    "with open('sample_data/expert-plan-templates-sample.yml', 'r') as file:\n",
-    "    EXPERT_PLAN_TEMPLATES_CONTENT = file.read()\n",
-    "EXPERT_PLAN_TEMPLATES_CONTENT = EXPERT_PLAN_TEMPLATES_CONTENT.format(**variables)\n",
-    "EXPERT_PLAN = yaml.safe_load(EXPERT_PLAN_TEMPLATES_CONTENT)\n",
-    "\n",
-    "EXPERT_HTP =  HTP(task=Task.from_dict_or_str(EXPERT_PLAN['task']),\n",
-    "                   sub_plans=[HTP.from_dict(d) for d in EXPERT_PLAN.get('sub-plans', [])])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "expert_htp_agent = Agent(planner=AutoHTPlanner(max_depth=2, max_subtasks_per_decomp=4),\n",
-    "                         reasoner=OodaReasoner(),\n",
-    "                         knowledge=None,\n",
-    "                         resources={FileResource(path=DOC_PATH)})\n",
-    "\n",
-    "expert_htp_agent_solution = expert_htp_agent.solve(problem=PROBLEM,\n",
-    "                                                   plan=EXPERT_HTP,\n",
-    "                                                   dynamic=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You can read the full logs of all the intermediate steps in `logs/expert_htp_agent_logs.txt`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 114,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "PROBLEM: \n",
-      "================================================================================\n",
-      "Is 3M a capital-intensive business based on FY2022 data? \n",
-      "\n",
-      "GROUND TRUTH ANSWER: \n",
-      "================================================================================\n",
-      "\n",
-      "    No, the company is managing its CAPEX and Fixed Assets pretty efficiently,\n",
-      "    which is evident from below key metrics:\n",
-      "    CAPEX/Revenue Ratio: 5.1%\n",
-      "    Fixed assets/Total Assets: 20%\n",
-      "    Return on Assets= 12.4% \n",
-      "\n",
-      "EXPERT HTP AGENT SOLUTION SUMMARIZED:\n",
-      "================================================================================\n",
-      "Based on the 2022 fiscal period data, although 3M has a significant investment\n",
-      "in Net Property, Plant & Equipment and a substantial asset base relative to its\n",
-      "sales, its Capital Expenditures and Return on Assets metrics do not align with\n",
-      "typical characteristics of a capital-intensive business. Therefore, 3M does not\n",
-      "fully exhibit the characteristics of a capital-intensive business according to\n",
-      "the provided benchmarks.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print_solution(expert_htp_agent_solution)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Yay! By incorporating the expert's plan, we instantly get the correct answer! "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Try It Yourself!"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "So now you've learned how OpenSSA's `HTP` works. You can try different combination of knobs that you can turn, including:\n",
-    "- auto-plan vs expert-plan\n",
-    "- statistically solving vs dynamically solving\n",
-    "- external knowledge vs no external knowledge"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Some tips and tricsk:\n",
-    "- If you want the fastest way to be up and running with HTP with ok-performance: try auto-plan with added knowledge and dynamically solving.\n",
-    "- If you want a sufficiently good result with least customization and runtime: try adding expert-plan without anything else\n",
-    "_ If you want the best result: try adding expert-plan with knowledge and dynamically solving!\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}