-
Notifications
You must be signed in to change notification settings - Fork 5
/
config.gpu.yaml
74 lines (65 loc) · 3.71 KB
/
config.gpu.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
debug: true
disable_aaa: false
httpx_verify_ssl: true
use_colors: true
uvicorn_workers: 1
embedding_chunk_size: 2000
doc_parser_worker_limit: 10
vectordb:
pgvector:
# 'connection' overrides the env var 'CCB_DB_URL'
chroma:
is_persistent: true
# chroma_server_host:
# chroma_server_http_port:
# chroma_server_ssl_enabled:
# chroma_server_api_default_path:
weaviate:
# auth_client_secret:
# url: http://localhost:8080
embedding:
protocol: http
host: localhost
port: 5000
workers: 1
offload_after_mins: 15 # in minutes
request_timeout: 1800 # in seconds
llama:
# 'model_alias' is reserved
# 'embedding' is always set to True
model: multilingual-e5-large-instruct-q6_k.gguf
n_batch: 16
n_ctx: 8192
n_gpu_layers: -1
llm:
nc_texttotext:
llama:
model_path: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf
n_batch: 512
n_ctx: 8192
max_tokens: 4096
template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant, good at finding relevant context from documents to answer questions provided by the user. <|im_end|>\n<|im_start|> user\nUse the following documents as context to answer the question at the end. REMEMBER to excersice source critisicm as the documents are returned by a search provider that can return unrelated documents.\n\nSTART OF CONTEXT: \n{context} \n\nEND OF CONTEXT!\n\nIf you don't know the answer or are unsure, just say that you don't know, don't try to make up an answer. Don't mention the context in your answer but rather just answer the question directly. Detect the language of the question and make sure to use the same language that was used in the question to answer the question. Don't mention which language was used, but just answer the question directly in the same langauge. \nQuestion: {question} Let's think this step-by-step. \n<|im_end|>\n<|im_start|> assistant\n"
no_ctx_template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant.<|im_end|>\n<|im_start|> user\n{question}<|im_end|>\n<|im_start|> assistant\n"
end_separator: "<|im_end|>"
n_gpu_layers: -1
model_kwargs:
device: cuda
ctransformer:
model: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf
template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant, good at finding relevant context from documents to answer questions provided by the user. <|im_end|>\n<|im_start|> user\nUse the following documents as context to answer the question at the end. REMEMBER to excersice source critisicm as the documents are returned by a search provider that can return unrelated documents.\n\nSTART OF CONTEXT: \n{context} \n\nEND OF CONTEXT!\n\nIf you don't know the answer or are unsure, just say that you don't know, don't try to make up an answer. Don't mention the context in your answer but rather just answer the question directly. Detect the language of the question and make sure to use the same language that was used in the question to answer the question. Don't mention which language was used, but just answer the question directly in the same langauge. \nQuestion: {question} Let's think this step-by-step. \n<|im_end|>\n<|im_start|> assistant\n"
no_ctx_template: "<|im_start|> system \nYou're an AI assistant named Nextcloud Assistant.<|im_end|>\n<|im_start|> user\n{question}<|im_end|>\n<|im_start|> assistant\n"
end_separator: "<|im_end|>"
config:
context_length: 8192
max_new_tokens: 4096
local_files_only: True
gpu_layers: -1
hugging_face:
model_id: gpt2
task: text-generation
pipeline_kwargs:
config:
max_length: 200
template: ""