Skip to content

Commit

Permalink
Merge pull request #170 from aurelio-labs/james/semantic-splits
Browse files Browse the repository at this point in the history
add set threshold
  • Loading branch information
jamescalam authored Feb 23, 2024
2 parents 599be55 + 2fa2b00 commit 4feb431
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "semantic-router"
version = "0.0.23"
version = "0.0.24"
description = "Super fast semantic router for AI decision making"
authors = [
"James Briggs <[email protected]>",
Expand Down
2 changes: 1 addition & 1 deletion semantic_router/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

__all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"]

__version__ = "0.0.23"
__version__ = "0.0.24"
7 changes: 6 additions & 1 deletion semantic_router/splitters/rolling_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def __init__(
self,
encoder: BaseEncoder,
threshold_adjustment=0.01,
dynamic_threshold: bool = True,
window_size=5,
min_split_tokens=100,
max_split_tokens=300,
Expand All @@ -25,6 +26,7 @@ def __init__(
self.calculated_threshold: float
self.encoder = encoder
self.threshold_adjustment = threshold_adjustment
self.dynamic_threshold = dynamic_threshold
self.window_size = window_size
self.plot_splits = plot_splits
self.min_split_tokens = min_split_tokens
Expand Down Expand Up @@ -321,7 +323,10 @@ def __call__(self, docs: List[str]) -> List[DocumentSplit]:
)
docs = split_to_sentences(docs[0])
encoded_docs = self.encode_documents(docs)
self.find_optimal_threshold(docs, encoded_docs)
if self.dynamic_threshold:
self.find_optimal_threshold(docs, encoded_docs)
else:
self.calculated_threshold = self.encoder.score_threshold
similarities = self.calculate_similarity_scores(encoded_docs)
split_indices = self.find_split_indices(similarities=similarities)
splits = self.split_documents(docs, split_indices, similarities)
Expand Down

0 comments on commit 4feb431

Please sign in to comment.