Merge branch 'master' into falkordb-langchain

FalkorDB · Aug 28, 2023 · 464b0eb · 464b0eb
2 parents c5da9b4 + eb3d1fa
commit 464b0eb
Show file tree

Hide file tree

Showing 11 changed files with 735 additions and 102 deletions.
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -44,7 +44,7 @@ If you are adding an issue, please try to keep it focused on a single, modular b
 If two issues are related, or blocking, please link them rather than combining them.
 
 We will try to keep these issues as up to date as possible, though
-with the rapid rate of develop in this field some may get out of date.
+with the rapid rate of development in this field some may get out of date.
 If you notice this happening, please let us know.
 
 ### 🙋Getting Help
@@ -87,7 +87,7 @@ This will install all requirements for running the package, examples, linting, f
 
 ❗Note: If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running Poetry v1.5.1. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases. If you are still seeing this bug on v1.5.1, you may also try disabling "modern installation" (`poetry config installer.modern-installation false`) and re-installing requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
 
-Now, you should be able to run the common tasks in the following section. To double check, run `make test`, all tests should pass. If they don't you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
+Now assuming `make` and `pytest` are installed, you should be able to run the common tasks in the following section. To double check, run `make test` under `libs/langchain`, all tests should pass. If they don't, you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
 
 ## ✅ Common Tasks
 
@@ -134,7 +134,7 @@ We recognize linting can be annoying - if you do not want to do it, please conta
 ### Spellcheck
 
 Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell).
-Note that `codespell` finds common typos, so could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
+Note that `codespell` finds common typos, so it could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
 
 To check spelling for this project:
 

diff --git a/docs/extras/integrations/llms/llamacpp.ipynb b/docs/extras/integrations/llms/llamacpp.ipynb
diff --git a/libs/experimental/langchain_experimental/sql/base.py b/libs/experimental/langchain_experimental/sql/base.py
@@ -29,6 +29,15 @@ class SQLDatabaseChain(Chain):
             from langchain import OpenAI, SQLDatabase
             db = SQLDatabase(...)
             db_chain = SQLDatabaseChain.from_llm(OpenAI(), db)
+
+    *Security note*: Make sure that the database connection uses credentials
+        that are narrowly-scoped to only include the permissions this chain needs.
+        Failure to do so may result in data corruption or loss, since this chain may
+        attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+        The best way to guard against such negative outcomes is to (as appropriate)
+        limit the permissions granted to the credentials used with this chain.
+        This issue shows an example negative outcome if these steps are not taken:
+        https://github.com/langchain-ai/langchain/issues/5923
     """
 
     llm_chain: LLMChain
@@ -49,7 +58,7 @@ class SQLDatabaseChain(Chain):
     return_direct: bool = False
     """Whether or not to return the result of querying the SQL table directly."""
     use_query_checker: bool = False
-    """Whether or not the query checker tool should be used to attempt 
+    """Whether or not the query checker tool should be used to attempt
     to fix the initial SQL from the LLM."""
     query_checker_prompt: Optional[BasePromptTemplate] = None
     """The prompt template that should be used by the query checker"""
@@ -197,6 +206,17 @@ def from_llm(
         prompt: Optional[BasePromptTemplate] = None,
         **kwargs: Any,
     ) -> SQLDatabaseChain:
+        """Create a SQLDatabaseChain from an LLM and a database connection.
+
+        *Security note*: Make sure that the database connection uses credentials
+            that are narrowly-scoped to only include the permissions this chain needs.
+            Failure to do so may result in data corruption or loss, since this chain may
+            attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+            The best way to guard against such negative outcomes is to (as appropriate)
+            limit the permissions granted to the credentials used with this chain.
+            This issue shows an example negative outcome if these steps are not taken:
+            https://github.com/langchain-ai/langchain/issues/5923
+        """
         prompt = prompt or SQL_PROMPTS.get(db.dialect, PROMPT)
         llm_chain = LLMChain(llm=llm, prompt=prompt)
         return cls(llm_chain=llm_chain, database=db, **kwargs)

diff --git a/libs/langchain/langchain/callbacks/tracers/evaluation.py b/libs/langchain/langchain/callbacks/tracers/evaluation.py
@@ -3,10 +3,11 @@
 
 import logging
 from concurrent.futures import Future, ThreadPoolExecutor, wait
-from typing import Any, List, Optional, Sequence, Set, Union
+from typing import Any, Dict, List, Optional, Sequence, Set, Union
 from uuid import UUID
 
-from langsmith import Client, RunEvaluator
+import langsmith
+from langsmith import schemas as langsmith_schemas
 
 from langchain.callbacks.manager import tracing_v2_enabled
 from langchain.callbacks.tracers.base import BaseTracer
@@ -62,13 +63,13 @@ class EvaluatorCallbackHandler(BaseTracer):
         The LangSmith project name to be organize eval chain runs under.
     """
 
-    name: str = "evaluator_callback_handler"
+    name = "evaluator_callback_handler"
 
     def __init__(
         self,
-        evaluators: Sequence[RunEvaluator],
+        evaluators: Sequence[langsmith.RunEvaluator],
         max_workers: Optional[int] = None,
-        client: Optional[Client] = None,
+        client: Optional[langsmith.Client] = None,
         example_id: Optional[Union[UUID, str]] = None,
         skip_unfinished: bool = True,
         project_name: Optional[str] = "evaluators",
@@ -86,10 +87,11 @@ def __init__(
         self.futures: Set[Future] = set()
         self.skip_unfinished = skip_unfinished
         self.project_name = project_name
+        self.logged_feedback: Dict[str, List[langsmith_schemas.Feedback]] = {}
         global _TRACERS
         _TRACERS.append(self)
 
-    def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
+    def _evaluate_in_project(self, run: Run, evaluator: langsmith.RunEvaluator) -> None:
         """Evaluate the run in the project.
 
         Parameters
@@ -102,18 +104,20 @@ def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
         """
         try:
             if self.project_name is None:
-                self.client.evaluate_run(run, evaluator)
+                feedback = self.client.evaluate_run(run, evaluator)
             with tracing_v2_enabled(
                 project_name=self.project_name, tags=["eval"], client=self.client
             ):
-                self.client.evaluate_run(run, evaluator)
+                feedback = self.client.evaluate_run(run, evaluator)
         except Exception as e:
             logger.error(
                 f"Error evaluating run {run.id} with "
                 f"{evaluator.__class__.__name__}: {e}",
                 exc_info=True,
             )
             raise e
+        example_id = str(run.reference_example_id)
+        self.logged_feedback.setdefault(example_id, []).append(feedback)
 
     def _persist_run(self, run: Run) -> None:
         """Run the evaluator on the run.

diff --git a/libs/langchain/langchain/llms/grammars/json.gbnf b/libs/langchain/langchain/llms/grammars/json.gbnf
@@ -0,0 +1,29 @@
+# Grammar for subset of JSON - doesn't support full string or number syntax
+
+root  ::= object
+value ::= object | array | string | number | boolean | "null"
+
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}"
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]"
+
+string  ::=
+  "\"" (
+    [^"\\] |
+    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+  )* "\"" ws
+
+# Only plain integers currently
+number  ::= "-"? [0-9]+ ws
+boolean ::= ("true" | "false") ws
+
+# Optional space: by convention, applied in this grammar after literal chars when allowed
+ws ::= ([ \t\n] ws)?
diff --git a/libs/langchain/langchain/llms/grammars/list.gbnf b/libs/langchain/langchain/llms/grammars/list.gbnf
@@ -0,0 +1,14 @@
+root ::= "[" items "]" EOF
+
+items ::= item ("," ws* item)*
+
+item ::= string
+
+string  ::=
+  "\"" word (ws+ word)* "\"" ws*
+
+word ::= [a-zA-Z]+
+
+ws ::= " "
+
+EOF ::= "\n"
diff --git a/libs/langchain/langchain/llms/llamacpp.py b/libs/langchain/langchain/llms/llamacpp.py
@@ -1,5 +1,8 @@
+from __future__ import annotations
+
 import logging
-from typing import Any, Dict, Iterator, List, Optional
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union
 
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 from langchain.llms.base import LLM
@@ -8,6 +11,9 @@
 from langchain.utils import get_pydantic_field_names
 from langchain.utils.utils import build_extra_kwargs
 
+if TYPE_CHECKING:
+    from llama_cpp import LlamaGrammar
+
 logger = logging.getLogger(__name__)
 
 
@@ -113,12 +119,35 @@ class LlamaCpp(LLM):
     streaming: bool = True
     """Whether to stream the results, token by token."""
 
+    grammar_path: Optional[Union[str, Path]] = None
+    """
+    grammar_path: Path to the .gbnf file that defines formal grammars
+    for constraining model outputs. For instance, the grammar can be used
+    to force the model to generate valid JSON or to speak exclusively in emojis. At most
+    one of grammar_path and grammar should be passed in.
+    """
+    grammar: Optional[Union[str, LlamaGrammar]] = None
+    """
+    grammar: formal grammar for constraining model outputs. For instance, the grammar 
+    can be used to force the model to generate valid JSON or to speak exclusively in 
+    emojis. At most one of grammar_path and grammar should be passed in.
+    """
+
     verbose: bool = True
     """Print verbose output to stderr."""
 
     @root_validator()
     def validate_environment(cls, values: Dict) -> Dict:
         """Validate that llama-cpp-python library is installed."""
+        try:
+            from llama_cpp import Llama, LlamaGrammar
+        except ImportError:
+            raise ImportError(
+                "Could not import llama-cpp-python library. "
+                "Please install the llama-cpp-python library to "
+                "use this embedding model: pip install llama-cpp-python"
+            )
+
         model_path = values["model_path"]
         model_param_names = [
             "rope_freq_scale",
@@ -146,21 +175,26 @@ def validate_environment(cls, values: Dict) -> Dict:
         model_params.update(values["model_kwargs"])
 
         try:
-            from llama_cpp import Llama
-
             values["client"] = Llama(model_path, **model_params)
-        except ImportError:
-            raise ImportError(
-                "Could not import llama-cpp-python library. "
-                "Please install the llama-cpp-python library to "
-                "use this embedding model: pip install llama-cpp-python"
-            )
         except Exception as e:
             raise ValueError(
                 f"Could not load Llama model from path: {model_path}. "
                 f"Received error {e}"
             )
 
+        if values["grammar"] and values["grammar_path"]:
+            grammar = values["grammar"]
+            grammar_path = values["grammar_path"]
+            raise ValueError(
+                "Can only pass in one of grammar and grammar_path. Received "
+                f"{grammar=} and {grammar_path=}."
+            )
+        elif isinstance(values["grammar"], str):
+            values["grammar"] = LlamaGrammar.from_string(values["grammar"])
+        elif values["grammar_path"]:
+            values["grammar"] = LlamaGrammar.from_file(values["grammar_path"])
+        else:
+            pass
         return values
 
     @root_validator(pre=True)
@@ -176,7 +210,7 @@ def build_model_kwargs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
     @property
     def _default_params(self) -> Dict[str, Any]:
         """Get the default parameters for calling llama_cpp."""
-        return {
+        params = {
             "suffix": self.suffix,
             "max_tokens": self.max_tokens,
             "temperature": self.temperature,
@@ -187,6 +221,9 @@ def _default_params(self) -> Dict[str, Any]:
             "repeat_penalty": self.repeat_penalty,
             "top_k": self.top_k,
         }
+        if self.grammar:
+            params["grammar"] = self.grammar
+        return params
 
     @property
     def _identifying_params(self) -> Dict[str, Any]:
@@ -252,7 +289,10 @@ def _call(
             # and return the combined strings from the first choices's text:
             combined_text_output = ""
             for chunk in self._stream(
-                prompt=prompt, stop=stop, run_manager=run_manager, **kwargs
+                prompt=prompt,
+                stop=stop,
+                run_manager=run_manager,
+                **kwargs,
             ):
                 combined_text_output += chunk.text
             return combined_text_output