Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ragdaemon7 #578

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/context_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ async def run_auto_context_benchmark(sample: Sample, config: Config, cwd: Path |
ignore_patterns = [cwd / ".venv"]
annotators = {
"hierarchy": {"ignore_patterns": ignore_patterns},
"chunker_line": {"lines_per_chunk": 100},
"chunker": {},
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change from chunker_line to chunker might affect how the data is processed. Can you confirm if this change was intended to align with updates in the ragdaemon package? It's important to ensure that this doesn't unintentionally alter the benchmarking process.

}
daemon = Daemon(cwd=cwd, annotators=annotators)
await daemon.update()
Expand All @@ -61,7 +61,7 @@ def main(user_samples: list[str], directory: str):
assert dir_path.exists(), f"Invalid directory: {directory}"
print(f"Running benchmarks from {dir_path}")
samples: list[Sample] = []
for root, dirs, files in os.walk(dir_path):
for root, _, files in os.walk(dir_path):
for file in files:
path = Path(root) / file
if file.endswith(".json"):
Expand Down
9 changes: 0 additions & 9 deletions benchmarks/run_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,6 @@ def setup_sample(
)
cwd = Path(repo.working_dir)

# Make sure there's a .gitignore file, and that '.ragdaemon/*' is in it
gitignore_path = cwd / ".gitignore"
if not gitignore_path.exists():
gitignore_path.write_text(".ragdaemon/*\n")
else:
gitignore_contents = gitignore_path.read_text()
if ".ragdaemon/*" not in gitignore_contents:
gitignore_path.write_text(gitignore_contents + ".ragdaemon/*\n")

test_executable = None
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing the .gitignore management for .ragdaemon might expose us to potential issues with untracked files getting into version control. Was there a specific reason for removing these lines? If it's about simplifying the setup, we might want to consider other ways to handle this, possibly at a higher level in the project configuration.

if not skip_test_exec and (sample.FAIL_TO_PASS or sample.PASS_TO_PASS):
# If there's an environment_setup_commit, this is what it's needed for.
Expand Down
2 changes: 1 addition & 1 deletion mentat/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.18
1.0.19
10 changes: 2 additions & 8 deletions mentat/code_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from mentat.llm_api_handler import get_max_tokens
from mentat.session_context import SESSION_CONTEXT
from mentat.session_stream import SessionStream
from mentat.utils import get_relative_path, mentat_dir_path
from mentat.utils import get_relative_path


class ContextStreamMessage(TypedDict):
Expand All @@ -35,10 +35,6 @@ class ContextStreamMessage(TypedDict):
total_cost: float


graphs_dir = mentat_dir_path / "ragdaemon"
graphs_dir.mkdir(parents=True, exist_ok=True)


class CodeContext:
daemon: Daemon

Expand Down Expand Up @@ -75,16 +71,14 @@ async def refresh_daemon(self):

annotators: dict[str, dict[str, Any]] = {
"hierarchy": {"ignore_patterns": [str(p) for p in self.ignore_patterns]},
"chunker_line": {"lines_per_chunk": 50},
"chunker": {},
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The removal of graph_path and model from the Daemon initialization could lead to issues if the daemon relies on these parameters for specific functionalities. Please ensure that the daemon's new version is compatible with these changes and that no functionality is lost.

"diff": {"diff": self.diff_context.target},
}
self.daemon = Daemon(
cwd=cwd,
annotators=annotators,
verbose=False,
graph_path=graphs_dir / f"ragdaemon-{cwd.name}.json",
spice_client=llm_api_handler.spice,
model=ctx.config.embedding_model,
provider=ctx.config.embedding_provider,
)
await self.daemon.update()
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ openai==1.13.3
pillow==10.1.0
prompt-toolkit==3.0.39
Pygments==2.15.1
ragdaemon==0.2.12
ragdaemon~=0.7.0
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updating ragdaemon and spiceai packages could introduce breaking changes or deprecations. It's crucial to verify that all functionalities integrated with these packages are still compatible after the updates.

selenium==4.15.2
sentry-sdk==1.34.0
sounddevice==0.4.6
soundfile==0.12.1
spiceai~=0.2.0
spiceai~=0.3.0
termcolor==2.3.0
textual==0.47.1
textual-autocomplete==2.1.0b0
Expand Down
8 changes: 5 additions & 3 deletions tests/code_context_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def func_4(string):
mock_session_context.config.auto_context_tokens = 8000

code_message = await code_context.get_code_message(0, prompt="prompt")
assert mock_session_context.llm_api_handler.spice.count_tokens(code_message, "gpt-4", is_message=True) == 85 # Code
assert mock_session_context.llm_api_handler.spice.count_tokens(code_message, "gpt-4", is_message=True) == 89 # Code
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The expected token count has changed from 85 to 89. It's good to see that tests are updated to reflect changes in the system's behavior, but it would be beneficial to add a comment explaining why this change is expected for future reference.

assert (
code_message
== """\
Expand All @@ -225,13 +225,15 @@ def func_4(string):
3:
4:def func_2():
5: return 3
6:

file_2.py
1:def func_3(a, b, c):
2: return a * b ** c
3:
4:def func_4(string):
5: print(string)
6:
"""
)

Expand All @@ -256,7 +258,7 @@ async def test_get_all_features(temp_testbed, mock_session_context):

# Test without include_files
features = mock_code_context.get_all_features()
assert len(features) == 2
assert len(features) == 6
feature1 = next(f for f in features if f.path == path1)
feature2 = next(f for f in features if f.path == path2)
for _f, _p in zip((feature1, feature2), (path1, path2)):
Expand All @@ -266,7 +268,7 @@ async def test_get_all_features(temp_testbed, mock_session_context):
# Test with include_files argument matching one file
mock_code_context.include(path1)
features = mock_code_context.get_all_features(split_intervals=False)
assert len(features) == 2
assert len(features) == 6
feature1b = next(f for f in features if f.path == path1)
feature2b = next(f for f in features if f.path == path2)
assert feature1b.interval.whole_file()
Expand Down
Loading