ShishirPatil · ShishirPatil · Aug 27, 2024 · Aug 1, 2024 · Aug 1, 2024 · Aug 1, 2024
diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md
@@ -12,15 +12,34 @@ We introduce the Berkeley Function Leaderboard (BFCL), the **first comprehensive
 Read more about the technical details and interesting insights in our [blog post](https://gorilla.cs.berkeley.edu/blogs/8_berkeley_function_calling_leaderboard.html)!
 
 ![image](./architecture_diagram.png)
+
+
 ### Install Dependencies
 
 ```bash
+# Create a new Conda environment with Python 3.10
 conda create -n BFCL python=3.10
+
+# Activate the new environment
 conda activate BFCL
-pip install -r requirements.txt # Inside gorilla/berkeley-function-call-leaderboard
-pip install vllm==0.5.0 # If you have vLLM supported GPU(s) and want to run our evaluation data against self-hosted OSS models.
+
+# Clone the Gorilla repository
+git clone https://github.com/ShishirPatil/gorilla.git
+
+# Change directory to the berkeley-function-call-leaderboard
+cd gorilla/berkeley-function-call-leaderboard
+
+# Install the package in editable mode
+pip install -e .
 ```
 
+### Installing Extra Dependencies for Self-Hosted Open Source Models
+
+To do LLM generation on self-hosted open source models, you need to run the following command to install the extra dependencies.
+```bash
+pip install -e .[oss_eval]
+```
+Note that this requires GPU supported by vLLM and it can only be installed on Linux and Windows (not Mac).
 
 ## Execution Evaluation Data Post-processing (Can be Skipped: Necesary for Executable Test Categories)
 Add your keys into `function_credential_config.json`, so that the original placeholder values in questions, params, and answers will be reset.
@@ -189,7 +208,7 @@ In the following two sections, the optional `--test-category` parameter can be u
 
 ### Running the Checker
 
-Navigate to the `gorilla/berkeley-function-call-leaderboard/eval_checker` directory and run the `eval_runner.py` script with the desired parameters. The basic syntax is as follows:
+Navigate to the `gorilla/berkeley-function-call-leaderboard/bfcl/eval_checker` directory and run the `eval_runner.py` script with the desired parameters. The basic syntax is as follows:
 
 ```bash
 python eval_runner.py --model MODEL_NAME --test-category TEST_CATEGORY

diff --git a/berkeley-function-call-leaderboard/apply_function_credential_config.py b/berkeley-function-call-leaderboard/apply_function_credential_config.py
@@ -2,7 +2,7 @@
 import json
 import argparse
 import os
-from eval_checker import custom_exception
+from bfcl.eval_checker import custom_exception
 
 parser = argparse.ArgumentParser(description="Replace placeholders in the function credential config file.")
 parser.add_argument("--input-path", help="Path to the function credential config file. Can be a file or a directory.")

diff --git a/berkeley-function-call-leaderboard/bfcl/__init__.py b/berkeley-function-call-leaderboard/bfcl/__init__.py
diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/__init__.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/__init__.py
diff --git a/...r/api_status_check_ground_truth_REST.json → ...r/api_status_check_ground_truth_REST.json b/...r/api_status_check_ground_truth_REST.json → ...r/api_status_check_ground_truth_REST.json
diff --git a/...status_check_ground_truth_executable.json → ...status_check_ground_truth_executable.json b/...status_check_ground_truth_executable.json → ...status_check_ground_truth_executable.json
diff --git a/...-call-leaderboard/eval_checker/checker.py → ...-leaderboard/bfcl/eval_checker/checker.py b/...-call-leaderboard/eval_checker/checker.py → ...-leaderboard/bfcl/eval_checker/checker.py
@@ -1,4 +1,4 @@
-from model_handler.constant import (
+from bfcl.model_handler.constant import (
     UNDERSCORE_TO_DOT,
     JAVA_TYPE_CONVERSION,
     JS_TYPE_CONVERSION,

diff --git a/...derboard/eval_checker/custom_exception.py → ...ard/bfcl/eval_checker/custom_exception.py b/...derboard/eval_checker/custom_exception.py → ...ard/bfcl/eval_checker/custom_exception.py
diff --git a/...ard/eval_checker/eval_checker_constant.py → ...fcl/eval_checker/eval_checker_constant.py b/...ard/eval_checker/eval_checker_constant.py → ...fcl/eval_checker/eval_checker_constant.py
diff --git a/...l-leaderboard/eval_checker/eval_runner.py → ...derboard/bfcl/eval_checker/eval_runner.py b/...l-leaderboard/eval_checker/eval_runner.py → ...derboard/bfcl/eval_checker/eval_runner.py
@@ -1,7 +1,3 @@
-import sys
-
-sys.path.append("../")
-
 from checker import ast_checker, exec_checker, executable_checker_rest
 from custom_exception import BadAPIStatusError
 from eval_runner_helper import *
@@ -440,10 +436,10 @@ def runner(model_names, test_categories, api_sanity_check):
     )
 
 
-INPUT_PATH = "../result/"
-PROMPT_PATH = "../data/"
-POSSIBLE_ANSWER_PATH = "../data/possible_answer/"
-OUTPUT_PATH = "../score/"
+INPUT_PATH = "../../result/"
+PROMPT_PATH = "../../data/"
+POSSIBLE_ANSWER_PATH = "../../data/possible_answer/"
+OUTPUT_PATH = "../../score/"
 
 # A dictionary to store the results
 # Key is model name, value is a dictionary with keys as test category and values as a dictionary with accuracy and total count

diff --git a/...rboard/eval_checker/eval_runner_helper.py → ...d/bfcl/eval_checker/eval_runner_helper.py b/...rboard/eval_checker/eval_runner_helper.py → ...d/bfcl/eval_checker/eval_runner_helper.py
@@ -6,7 +6,7 @@
 import re
 import numpy as np
 from custom_exception import BadAPIStatusError
-from model_handler.handler_map import handler_map
+from bfcl.model_handler.handler_map import handler_map
 from tqdm import tqdm
 
 REST_API_GROUND_TRUTH_FILE_PATH = "api_status_check_ground_truth_REST.json"
@@ -809,7 +809,7 @@ def api_status_sanity_check_rest():
     ground_truth_dummy = load_file(REST_API_GROUND_TRUTH_FILE_PATH)
 
     # Use the ground truth data to make sure the API is working correctly
-    command = f"cd .. ; python apply_function_credential_config.py --input-path ./eval_checker/{REST_API_GROUND_TRUTH_FILE_PATH};"
+    command = f"cd ../.. ; python apply_function_credential_config.py --input-path ./bfcl/eval_checker/{REST_API_GROUND_TRUTH_FILE_PATH};"
     try:
         subprocess.run(command, shell=True, capture_output=True, text=True, check=True)
     except subprocess.CalledProcessError as e:

diff --git a/...val_checker/executable_python_function.py → ...val_checker/executable_python_function.py b/...val_checker/executable_python_function.py → ...val_checker/executable_python_function.py
@@ -5,7 +5,7 @@
 import time
 
 api_key = {}
-with open("../function_credential_config.json") as f:
+with open("../../function_credential_config.json") as f:
     data = json.loads(f.read())
     for item in data:
         for k, v in item.items():

diff --git a/...board/eval_checker/java_type_converter.py → .../bfcl/eval_checker/java_type_converter.py b/...board/eval_checker/java_type_converter.py → .../bfcl/eval_checker/java_type_converter.py
@@ -1,6 +1,6 @@
 import re
 from typing import List, Dict, Union
-from model_handler.constant import JAVA_TYPE_CONVERSION
+from bfcl.model_handler.constant import JAVA_TYPE_CONVERSION
 
 
 def java_type_converter(value, expected_type, nested_type=None):

diff --git a/...erboard/eval_checker/js_type_converter.py → ...rd/bfcl/eval_checker/js_type_converter.py b/...erboard/eval_checker/js_type_converter.py → ...rd/bfcl/eval_checker/js_type_converter.py
@@ -1,5 +1,5 @@
 import re
-from model_handler.constant import JS_TYPE_CONVERSION
+from bfcl.model_handler.constant import JS_TYPE_CONVERSION
 
 
 def js_type_converter(value, expected_type, nested_type=None):

diff --git a/.../eval_checker/rest-eval-response_v5.jsonl → .../eval_checker/rest-eval-response_v5.jsonl b/.../eval_checker/rest-eval-response_v5.jsonl → .../eval_checker/rest-eval-response_v5.jsonl
diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/__init__.py b/berkeley-function-call-leaderboard/bfcl/model_handler/__init__.py
diff --git a/...aderboard/model_handler/arctic_handler.py → ...oard/bfcl/model_handler/arctic_handler.py b/...aderboard/model_handler/arctic_handler.py → ...oard/bfcl/model_handler/arctic_handler.py
@@ -1,5 +1,5 @@
-from model_handler.nvidia_handler import NvidiaHandler
-from model_handler.utils import ast_parse
+from bfcl.model_handler.nvidia_handler import NvidiaHandler
+from bfcl.model_handler.utils import ast_parse
 
 class ArcticHandler(NvidiaHandler):
     def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:

diff --git a/...aderboard/model_handler/claude_handler.py → ...oard/bfcl/model_handler/claude_handler.py b/...aderboard/model_handler/claude_handler.py → ...oard/bfcl/model_handler/claude_handler.py
@@ -4,10 +4,10 @@
 
 from anthropic import Anthropic
 from anthropic.types import TextBlock, ToolUseBlock
-from model_handler.constant import GORILLA_TO_OPENAPI, DEFAULT_SYSTEM_PROMPT
-from model_handler.handler import BaseHandler
-from model_handler.model_style import ModelStyle
-from model_handler.utils import (
+from bfcl.model_handler.constant import GORILLA_TO_OPENAPI, DEFAULT_SYSTEM_PROMPT
+from bfcl.model_handler.handler import BaseHandler
+from bfcl.model_handler.model_style import ModelStyle
+from bfcl.model_handler.utils import (
     ast_parse,
     convert_to_function_call,
     convert_to_tool,

diff --git a/...aderboard/model_handler/cohere_handler.py → ...oard/bfcl/model_handler/cohere_handler.py b/...aderboard/model_handler/cohere_handler.py → ...oard/bfcl/model_handler/cohere_handler.py
@@ -1,20 +1,20 @@
 import os
 
-from model_handler.handler import BaseHandler
-from model_handler.model_style import ModelStyle
-from model_handler.utils import (
+from bfcl.model_handler.handler import BaseHandler
+from bfcl.model_handler.model_style import ModelStyle
+from bfcl.model_handler.utils import (
     func_doc_language_specific_pre_processing,
     convert_to_tool,
     ast_parse,
 )
-from model_handler.constant import (
+from bfcl.model_handler.constant import (
     DEFAULT_SYSTEM_PROMPT,
     GORILLA_TO_PYTHON,
 )
 import time
 import cohere
 
-from model_handler.constant import USE_COHERE_OPTIMIZATION
+from bfcl.model_handler.constant import USE_COHERE_OPTIMIZATION
 
 
 class CohereHandler(BaseHandler):

diff --git a/...all-leaderboard/model_handler/constant.py → ...eaderboard/bfcl/model_handler/constant.py b/...all-leaderboard/model_handler/constant.py → ...eaderboard/bfcl/model_handler/constant.py
diff --git a/...board/model_handler/databricks_handler.py → .../bfcl/model_handler/databricks_handler.py b/...board/model_handler/databricks_handler.py → .../bfcl/model_handler/databricks_handler.py
@@ -1,12 +1,12 @@
-from model_handler.handler import BaseHandler
-from model_handler.model_style import ModelStyle
-from model_handler.utils import (
+from bfcl.model_handler.handler import BaseHandler
+from bfcl.model_handler.model_style import ModelStyle
+from bfcl.model_handler.utils import (
     func_doc_language_specific_pre_processing,
     system_prompt_pre_processing_chat_model,
     combine_consecutive_user_prompr,
     ast_parse,
 )
-from model_handler.constant import (
+from bfcl.model_handler.constant import (
     DEFAULT_SYSTEM_PROMPT,
 )
 import time

diff --git a/...erboard/model_handler/deepseek_handler.py → ...rd/bfcl/model_handler/deepseek_handler.py b/...erboard/model_handler/deepseek_handler.py → ...rd/bfcl/model_handler/deepseek_handler.py
@@ -1,5 +1,5 @@
-from model_handler.oss_handler import OSSHandler
-from model_handler.utils import ast_parse
+from bfcl.model_handler.oss_handler import OSSHandler
+from bfcl.model_handler.utils import ast_parse
 import re
 
 

diff --git a/...oard/model_handler/firework_ai_handler.py → ...bfcl/model_handler/firework_ai_handler.py b/...oard/model_handler/firework_ai_handler.py → ...bfcl/model_handler/firework_ai_handler.py
@@ -2,10 +2,10 @@
 import os
 import time
 
-from model_handler.constant import GORILLA_TO_OPENAPI
-from model_handler.gpt_handler import OpenAIHandler
-from model_handler.model_style import ModelStyle
-from model_handler.utils import (
+from bfcl.model_handler.constant import GORILLA_TO_OPENAPI
+from bfcl.model_handler.gpt_handler import OpenAIHandler
+from bfcl.model_handler.model_style import ModelStyle
+from bfcl.model_handler.utils import (
     convert_to_tool,
     func_doc_language_specific_pre_processing,
 )

diff --git a/...oard/model_handler/functionary_handler.py → ...bfcl/model_handler/functionary_handler.py b/...oard/model_handler/functionary_handler.py → ...bfcl/model_handler/functionary_handler.py
@@ -1,5 +1,5 @@
-from model_handler.gpt_handler import OpenAIHandler
-from model_handler.model_style import ModelStyle
+from bfcl.model_handler.gpt_handler import OpenAIHandler
+from bfcl.model_handler.model_style import ModelStyle
 import os, json
 from openai import OpenAI
 

diff --git a/...aderboard/model_handler/gemini_handler.py → ...oard/bfcl/model_handler/gemini_handler.py b/...aderboard/model_handler/gemini_handler.py → ...oard/bfcl/model_handler/gemini_handler.py
@@ -1,11 +1,11 @@
-from model_handler.handler import BaseHandler
-from model_handler.model_style import ModelStyle
-from model_handler.utils import (
+from bfcl.model_handler.handler import BaseHandler
+from bfcl.model_handler.model_style import ModelStyle
+from bfcl.model_handler.utils import (
     convert_to_tool,
     convert_to_function_call,
     func_doc_language_specific_pre_processing,
 )
-from model_handler.constant import GORILLA_TO_OPENAPI
+from bfcl.model_handler.constant import GORILLA_TO_OPENAPI
 import subprocess, requests, json, time
 
 

diff --git a/...eaderboard/model_handler/gemma_handler.py → ...board/bfcl/model_handler/gemma_handler.py b/...eaderboard/model_handler/gemma_handler.py → ...board/bfcl/model_handler/gemma_handler.py
@@ -1,5 +1,5 @@
-from model_handler.oss_handler import OSSHandler
-from model_handler.utils import ast_parse
+from bfcl.model_handler.oss_handler import OSSHandler
+from bfcl.model_handler.utils import ast_parse
 import re
 
 

diff --git a/...aderboard/model_handler/glaive_handler.py → ...oard/bfcl/model_handler/glaive_handler.py b/...aderboard/model_handler/glaive_handler.py → ...oard/bfcl/model_handler/glaive_handler.py
@@ -1,5 +1,5 @@
-from model_handler.oss_handler import OSSHandler
-from model_handler.utils import convert_to_function_call
+from bfcl.model_handler.oss_handler import OSSHandler
+from bfcl.model_handler.utils import convert_to_function_call
 import json
 
 

diff --git a/...-leaderboard/model_handler/glm_handler.py → ...erboard/bfcl/model_handler/glm_handler.py b/...-leaderboard/model_handler/glm_handler.py → ...erboard/bfcl/model_handler/glm_handler.py
@@ -1,5 +1,5 @@
-from model_handler.oss_handler import OSSHandler
-from model_handler.utils import convert_to_function_call
+from bfcl.model_handler.oss_handler import OSSHandler
+from bfcl.model_handler.utils import convert_to_function_call
 import json
 
 

diff --git a/...derboard/model_handler/gorilla_handler.py → ...ard/bfcl/model_handler/gorilla_handler.py b/...derboard/model_handler/gorilla_handler.py → ...ard/bfcl/model_handler/gorilla_handler.py
@@ -1,6 +1,6 @@
-from model_handler.handler import BaseHandler
-from model_handler.model_style import ModelStyle
-from model_handler.utils import (
+from bfcl.model_handler.handler import BaseHandler
+from bfcl.model_handler.model_style import ModelStyle
+from bfcl.model_handler.utils import (
     ast_parse,
     system_prompt_pre_processing_chat_model,
     func_doc_language_specific_pre_processing,

diff --git a/...-leaderboard/model_handler/gpt_handler.py → ...erboard/bfcl/model_handler/gpt_handler.py b/...-leaderboard/model_handler/gpt_handler.py → ...erboard/bfcl/model_handler/gpt_handler.py
@@ -1,13 +1,13 @@
-from model_handler.handler import BaseHandler
-from model_handler.model_style import ModelStyle
-from model_handler.utils import (
+from bfcl.model_handler.handler import BaseHandler
+from bfcl.model_handler.model_style import ModelStyle
+from bfcl.model_handler.utils import (
     convert_to_tool,
     convert_to_function_call,
     system_prompt_pre_processing_chat_model,
     func_doc_language_specific_pre_processing,
     ast_parse,
 )
-from model_handler.constant import (
+from bfcl.model_handler.constant import (
     GORILLA_TO_OPENAPI,
     DEFAULT_SYSTEM_PROMPT,
 )

diff --git a/...derboard/model_handler/granite_handler.py → ...ard/bfcl/model_handler/granite_handler.py b/...derboard/model_handler/granite_handler.py → ...ard/bfcl/model_handler/granite_handler.py
@@ -1,9 +1,9 @@
 import json
 
-from model_handler.model_style import ModelStyle
-from model_handler.oss_handler import OSSHandler
-from model_handler.constant import GORILLA_TO_OPENAPI
-from model_handler.utils import convert_to_tool
+from bfcl.model_handler.model_style import ModelStyle
+from bfcl.model_handler.oss_handler import OSSHandler
+from bfcl.model_handler.constant import GORILLA_TO_OPENAPI
+from bfcl.model_handler.utils import convert_to_tool
 
 
 class GraniteHandler(OSSHandler):

diff --git a/...call-leaderboard/model_handler/handler.py → ...leaderboard/bfcl/model_handler/handler.py b/...call-leaderboard/model_handler/handler.py → ...leaderboard/bfcl/model_handler/handler.py
@@ -1,4 +1,4 @@
-from model_handler.model_style import ModelStyle
+from bfcl.model_handler.model_style import ModelStyle
 import json, os
 
 class BaseHandler:

diff --git a/...-leaderboard/model_handler/handler_map.py → ...erboard/bfcl/model_handler/handler_map.py b/...-leaderboard/model_handler/handler_map.py → ...erboard/bfcl/model_handler/handler_map.py
@@ -1,24 +1,25 @@
-from model_handler.arctic_handler import ArcticHandler
-from model_handler.claude_handler import ClaudeHandler
-from model_handler.cohere_handler import CohereHandler
-from model_handler.databricks_handler import DatabricksHandler
-from model_handler.deepseek_handler import DeepseekHandler
-from model_handler.firework_ai_handler import FireworkAIHandler
-from model_handler.functionary_handler import FunctionaryHandler
-from model_handler.gemini_handler import GeminiHandler
-from model_handler.gemma_handler import GemmaHandler
-from model_handler.glaive_handler import GlaiveHandler
-from model_handler.gorilla_handler import GorillaHandler
-from model_handler.gpt_handler import OpenAIHandler
-from model_handler.hermes_handler import HermesHandler
-from model_handler.llama_handler import LlamaHandler
-from model_handler.mistral_handler import MistralHandler
-from model_handler.nexus_handler import NexusHandler
-from model_handler.granite_handler import GraniteHandler
-from model_handler.nvidia_handler import NvidiaHandler
-from model_handler.glm_handler import GLMHandler
-from model_handler.yi_handler import YiHandler
-from model_handler.xlam_handler import xLAMHandler
+from bfcl.model_handler.arctic_handler import ArcticHandler
+from bfcl.model_handler.claude_handler import ClaudeHandler
+from bfcl.model_handler.cohere_handler import CohereHandler
+from bfcl.model_handler.databricks_handler import DatabricksHandler
+from bfcl.model_handler.deepseek_handler import DeepseekHandler
+from bfcl.model_handler.firework_ai_handler import FireworkAIHandler
+from bfcl.model_handler.functionary_handler import FunctionaryHandler
+from bfcl.model_handler.gemini_handler import GeminiHandler
+from bfcl.model_handler.gemma_handler import GemmaHandler
+from bfcl.model_handler.glaive_handler import GlaiveHandler
+from bfcl.model_handler.gorilla_handler import GorillaHandler
+from bfcl.model_handler.gpt_handler import OpenAIHandler
+from bfcl.model_handler.hermes_handler import HermesHandler
+from bfcl.model_handler.llama_handler import LlamaHandler
+from bfcl.model_handler.mistral_handler import MistralHandler
+from bfcl.model_handler.nexus_handler import NexusHandler
+from bfcl.model_handler.oss_handler import OSSHandler
+from bfcl.model_handler.granite_handler import GraniteHandler
+from bfcl.model_handler.nvidia_handler import NvidiaHandler
+from bfcl.model_handler.glm_handler import GLMHandler
+from bfcl.model_handler.yi_handler import YiHandler
+from bfcl.model_handler.xlam_handler import xLAMHandler
 
 handler_map = {
     "gorilla-openfunctions-v0": GorillaHandler,

diff --git a/...aderboard/model_handler/hermes_handler.py → ...oard/bfcl/model_handler/hermes_handler.py b/...aderboard/model_handler/hermes_handler.py → ...oard/bfcl/model_handler/hermes_handler.py
@@ -1,7 +1,7 @@
-from model_handler.oss_handler import OSSHandler
-from model_handler.utils import convert_to_tool
-from model_handler.constant import GORILLA_TO_OPENAPI
-from model_handler.model_style import ModelStyle
+from bfcl.model_handler.oss_handler import OSSHandler
+from bfcl.model_handler.utils import convert_to_tool
+from bfcl.model_handler.constant import GORILLA_TO_OPENAPI
+from bfcl.model_handler.model_style import ModelStyle
 import json
 import inspect
 

diff --git a/...-leaderboard/model_handler/java_parser.py → ...erboard/bfcl/model_handler/java_parser.py b/...-leaderboard/model_handler/java_parser.py → ...erboard/bfcl/model_handler/java_parser.py
diff --git a/...ll-leaderboard/model_handler/js_parser.py → ...aderboard/bfcl/model_handler/js_parser.py b/...ll-leaderboard/model_handler/js_parser.py → ...aderboard/bfcl/model_handler/js_parser.py
diff --git a/...eaderboard/model_handler/llama_handler.py → ...board/bfcl/model_handler/llama_handler.py b/...eaderboard/model_handler/llama_handler.py → ...board/bfcl/model_handler/llama_handler.py
@@ -1,5 +1,5 @@
-from model_handler.oss_handler import OSSHandler
-from model_handler.utils import ast_parse
+from bfcl.model_handler.oss_handler import OSSHandler
+from bfcl.model_handler.utils import ast_parse
 
 
 class LlamaHandler(OSSHandler):

diff --git a/...derboard/model_handler/mistral_handler.py → ...ard/bfcl/model_handler/mistral_handler.py b/...derboard/model_handler/mistral_handler.py → ...ard/bfcl/model_handler/mistral_handler.py
@@ -1,10 +1,10 @@
-from model_handler.handler import BaseHandler
-from model_handler.model_style import ModelStyle
-from model_handler.constant import (
+from bfcl.model_handler.handler import BaseHandler
+from bfcl.model_handler.model_style import ModelStyle
+from bfcl.model_handler.constant import (
     DEFAULT_SYSTEM_PROMPT,
     GORILLA_TO_OPENAPI,
 )
-from model_handler.utils import (
+from bfcl.model_handler.utils import (
     convert_to_tool,
     ast_parse,
     convert_to_function_call,

diff --git a/...-leaderboard/model_handler/model_style.py → ...erboard/bfcl/model_handler/model_style.py b/...-leaderboard/model_handler/model_style.py → ...erboard/bfcl/model_handler/model_style.py