You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Exception: Requested tokens (76) exceed context window of 32
Traceback (most recent call last):
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\llama_cpp\server\errors.py", line 171, in custom_route_handler
response = await original_route_handler(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\fastapi\routing.py", line 301, in app
raw_response = await run_endpoint_function(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\fastapi\routing.py", line 212, in run_endpoint_function
return await dependant.call(**values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\llama_cpp\server\app.py", line 513, in create_chat_completion
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\concurrency.py", line 39, in run_in_threadpool
return await anyio.to_thread.run_sync(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\anyio\to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\anyio\_backends\_asyncio.py", line 2405, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\anyio\_backends\_asyncio.py", line 914, in run
result = context.run(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\llama_cpp\llama.py", line 1898, in create_chat_completion
return handler(
^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\llama_cpp\llama_chat_format.py", line 637, in chat_completion_handler
completion_or_chunks = llama.create_completion(
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\llama_cpp\llama.py", line 1732, in create_completion
completion: Completion = next(completion_or_chunks) # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\llama_cpp\llama.py", line 1169, in _create_completion
raise ValueError(
ValueError: Requested tokens (76) exceed context window of 32
INFO: 127.0.0.1:59463 - "POST /v1/chat/completions HTTP/1.1" 400 Bad Request
If I force quit, crash my terminal
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "C:\Program Files\Python311\Lib\asyncio\runners.py", line 190, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "C:\Program Files\Python311\Lib\asyncio\runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python311\Lib\asyncio\base_events.py", line 641, in run_until_complete
self.run_forever()
File "C:\Program Files\Python311\Lib\asyncio\windows_events.py", line 321, in run_forever
super().run_forever()
File "C:\Program Files\Python311\Lib\asyncio\base_events.py", line 608, in run_forever
self._run_once()
File "C:\Program Files\Python311\Lib\asyncio\base_events.py", line 1936, in _run_once
handle._run()
File "C:\Program Files\Python311\Lib\asyncio\events.py", line 84, in _run
self._context.run(self._callback, *self._args)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\uvicorn\server.py", line 68, in serve
with self.capture_signals():
File "C:\Program Files\Python311\Lib\contextlib.py", line 144, in __exit__
next(self.gen)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\uvicorn\server.py", line 328, in capture_signals
signal.raise_signal(captured_signal)
File "C:\Program Files\Python311\Lib\asyncio\runners.py", line 157, in _on_sigint
raise KeyboardInterrupt()
KeyboardInterrupt
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\uvicorn\protocols\http\h11_impl.py", line 406, in run_asgi
result = await app( # type: ignore[func-returns-value]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 70, in __call__
return await self.app(scope, receive, send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
await super().__call__(scope, receive, send)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\applications.py", line 113, in __call__
await self.middleware_stack(scope, receive, send)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\middleware\errors.py", line 165, in __call__
await self.app(scope, receive, _send)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\middleware\cors.py", line 85, in __call__
await self.app(scope, receive, send)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette_context\middleware\raw_middleware.py", line 92, in __call__
await self.app(scope, receive, send_wrapper)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\middleware\exceptions.py", line 62, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\_exception_handler.py", line 51, in wrapped_app
await app(scope, receive, sender)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\routing.py", line 715, in __call__
await self.middleware_stack(scope, receive, send)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\routing.py", line 735, in app
await route.handle(scope, receive, send)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\routing.py", line 288, in handle
await self.app(scope, receive, send)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\routing.py", line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\_exception_handler.py", line 51, in wrapped_app
await app(scope, receive, sender)
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\routing.py", line 73, in app
response = await f(request)
^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\llama_cpp\server\errors.py", line 171, in custom_route_handler
response = await original_route_handler(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\fastapi\routing.py", line 291, in app
solved_result = await solve_dependencies(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\fastapi\dependencies\utils.py", line 624, in solve_dependencies
solved = await solve_generator(
^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\fastapi\dependencies\utils.py", line 550, in solve_generator
return await stack.enter_async_context(cm)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python311\Lib\contextlib.py", line 650, in enter_async_context
result = await _enter(cm)
^^^^^^^^^^^^^^^^
File "C:\Program Files\Python311\Lib\contextlib.py", line 210, in __aenter__
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\fastapi\concurrency.py", line 27, in contextmanager_in_threadpool
yield await run_in_threadpool(cm.__enter__)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\starlette\concurrency.py", line 39, in run_in_threadpool
return await anyio.to_thread.run_sync(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\anyio\to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\carlo\workspace\genAI\carlos-ai\.venv-ai\Lib\site-packages\anyio\_backends\_asyncio.py", line 2405, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
asyncio.exceptions.CancelledError
The text was updated successfully, but these errors were encountered:
carlostomazin
changed the title
Server crash with exceed context >= v0.2.81
Server crash with exceed context | lib version >= v0.2.81
Sep 26, 2024
Prerequisites
Please answer the following questions for yourself before submitting an issue.
Expected Behavior
I'm trying to send a prompt with a large context.
I hope the server returns an error 'exceed context' and remains active.
Current Behavior
The server crashed with exceed context and no respond for other requests.
Environment and Context
Windows 11
CUDA 12.6
Python 3.11.9
Failure Information (for bugs)
Steps to Reproduce
Failure Logs
Expected log, but no respond for other requests.
If I force quit, crash my terminal
The text was updated successfully, but these errors were encountered: