Added rop of pending updates on bot start, reset command, AnswerChat method, GPU offload, limit to response lenght, context reduced to 2048, flash attention, 4 parallel decode queues, --keep of the original 810 tokens (which is the starting prompt)

This commit is contained in:
Samuele Lorefice
2024-12-26 03:24:56 +01:00
parent 296d150282
commit 4167c75279
5 changed files with 49 additions and 20 deletions

2
.env
View File

@@ -1,2 +1,2 @@
MODEL_PATH=./model MODEL_PATH=./model
MODEL_NAME=Qwen2.5-7B-Instruct-Q8_0.gguf MODEL_NAME=Qwen2.5-7B-Instruct-Q8.gguf

View File

@@ -23,9 +23,9 @@ services:
RESHARPER_LOG_CONF: "/etc/opt/JetBrains/RiderDebuggerTools/backend-log.xml" RESHARPER_LOG_CONF: "/etc/opt/JetBrains/RiderDebuggerTools/backend-log.xml"
image: "telegrambot:dev" image: "telegrambot:dev"
ports: ports:
- "127.0.0.1:57017:57000" - "127.0.0.1:57033:57000"
- "127.0.0.1:57217:57200" - "127.0.0.1:57233:57200"
- "127.0.0.1:57417:57400" - "127.0.0.1:57433:57400"
volumes: volumes:
- "I:\\NemesisAI\\TelegramBot:/app:rw" - "I:\\NemesisAI\\TelegramBot:/app:rw"
- "I:\\NemesisAI:/src:rw" - "I:\\NemesisAI:/src:rw"
@@ -34,5 +34,5 @@ services:
Linux64:/opt/JetBrains/RiderDebuggerTools" Linux64:/opt/JetBrains/RiderDebuggerTools"
- "C:\\Users\\airon\\AppData\\Local\\Programs\\Rider\\bin\\backend-log.xml:/etc/opt/JetBrains/RiderDebuggerTools/backend-log.xml" - "C:\\Users\\airon\\AppData\\Local\\Programs\\Rider\\bin\\backend-log.xml:/etc/opt/JetBrains/RiderDebuggerTools/backend-log.xml"
- "C:\\Users\\airon\\AppData\\Local\\JetBrains\\Rider2024.3\\log\\DebuggerWorker\\\ - "C:\\Users\\airon\\AppData\\Local\\JetBrains\\Rider2024.3\\log\\DebuggerWorker\\\
JetBrains.Debugger.Worker.2024_12_26_01_25_50:/var/opt/JetBrains/RiderDebuggerTools:rw" JetBrains.Debugger.Worker.2024_12_26_03_21_12:/var/opt/JetBrains/RiderDebuggerTools:rw"
working_dir: "/app" working_dir: "/app"

View File

@@ -1,4 +1,4 @@
TELEGRAM_BOT_TOKEN=yourTokenHere TELEGRAM_BOT_TOKEN=yourTokenHere
OPENAI_BASE_URL=http://llm-server/ OPENAI_BASE_URL=http://llm-server/
OPENAI_MODEL=Qwen2.5-7B-Instruct-Q8_0.gguf OPENAI_MODEL=Qwen2.5-7B-Instruct-Q8.gguf
OPENAI_API_KEY=MyApiKey OPENAI_API_KEY=MyApiKey

View File

@@ -103,6 +103,7 @@ Console.WriteLine("OpenAI Chat Client created");
using var cts = new CancellationTokenSource(); using var cts = new CancellationTokenSource();
var bot = new TelegramBotClient(token, cancellationToken:cts.Token); var bot = new TelegramBotClient(token, cancellationToken:cts.Token);
await bot.DropPendingUpdates();
var me = bot.GetMe(); var me = bot.GetMe();
bot.OnMessage += OnMessage; bot.OnMessage += OnMessage;
Console.WriteLine("Bot running"); Console.WriteLine("Bot running");
@@ -123,22 +124,43 @@ async Task OnMessage(Message msg, UpdateType type)
Message: {msg.Text} Message: {msg.Text}
"""); """);
var chatid = msg.Chat.Id; var chatid = msg.Chat.Id;
//Check if the chat is already in the dictionary //Check if the message is a reset command
if (!oaiChats.ContainsKey(chatid)) if (msg.Text.StartsWith("/reset")) {
AddChatToDictionary(chatid); ResetChat(chatid);
//Add the current message to the chat await bot.SendMessage(chatid, "Chat context has been reset");
oaiChats[chatid].Add(new UserChatMessage(msg.Text)); return;
//fetch existing messages history }
var messages = oaiChats[chatid]; // Otherwise process it normally
//Fetch the response from the model await AnswerChat(chatid, msg.Text);
var result = chatClient.CompleteChat(messages).Value.Content[0].Text;
//Add the response to the chat
oaiChats[chatid].Add(new AssistantChatMessage(result));
//Send the response to the user
await bot.SendMessage(chatid, result);
} }
} }
async Task AnswerChat(long chatId, string input) {
//Check if the chat is already in the dictionary
if (!oaiChats.ContainsKey(chatId))
AddChatToDictionary(chatId);
string text = input;
//Limit the message to 1024 characters to avoid out of context jump
if (input.Length > 1024) text = input.Substring(0, 1024);
//Add the current message to the chat
oaiChats[chatId].Add(new UserChatMessage(text));
//fetch existing messages history
var messages = oaiChats[chatId];
//Fetch the response from the model
var result = chatClient.CompleteChat(messages).Value.Content[0].Text;
//Add the response to the chat
Console.WriteLine("Replying with: " + result);
oaiChats[chatId].Add(new AssistantChatMessage(result));
//Send the response to the user
await bot.SendMessage(chatId, result);
}
void AddChatToDictionary(long id) { void AddChatToDictionary(long id) {
//Create a new chat object //Create a new chat object
var chat = new List<ChatMessage>(); var chat = new List<ChatMessage>();
@@ -146,3 +168,10 @@ void AddChatToDictionary(long id) {
//add the entry to the dictionary //add the entry to the dictionary
oaiChats.Add(id, chat); oaiChats.Add(id, chat);
} }
void ResetChat(long chatId) {
//Remove the chat from the dictionary
oaiChats.Remove(chatId);
//Add the chat back to the dictionary
AddChatToDictionary(chatId);
}

View File

@@ -14,7 +14,7 @@
- ${MODEL_PATH}:/models - ${MODEL_PATH}:/models
ports: ports:
- "80:80" - "80:80"
command: -m /models/${MODEL_NAME} --port 80 --host 0.0.0.0 -n 512 command: -m /models/${MODEL_NAME} --port 80 --host 0.0.0.0 -n 128 -c 2048 --no-mmap -ngl 50 -fa -np 4 --keep 810
deploy: deploy:
resources: resources:
reservations: reservations: