Added rop of pending updates on bot start, reset command, AnswerChat method, GPU offload, limit to response lenght, context reduced to 2048, flash attention, 4 parallel decode queues, --keep of the original 810 tokens (which is the starting prompt)

2024-12-26 03:24:56 +01:00
parent 296d150282
commit 4167c75279
5 changed files with 49 additions and 20 deletions
--- a/.env
+++ b/.env
@@ -1,2 +1,2 @@
 MODEL_PATH=./model
-MODEL_NAME=Qwen2.5-7B-Instruct-Q8_0.gguf
+MODEL_NAME=Qwen2.5-7B-Instruct-Q8.gguf
--- a/.idea/.idea.NemesisAI/Docker/compose.generated.override.yml
+++ b/.idea/.idea.NemesisAI/Docker/compose.generated.override.yml
@@ -23,9 +23,9 @@ services:
      RESHARPER_LOG_CONF: "/etc/opt/JetBrains/RiderDebuggerTools/backend-log.xml"
    image: "telegrambot:dev"
    ports:
-    - "127.0.0.1:57017:57000"
+    - "127.0.0.1:57033:57000"
-    - "127.0.0.1:57217:57200"
+    - "127.0.0.1:57233:57200"
-    - "127.0.0.1:57417:57400"
+    - "127.0.0.1:57433:57400"
    volumes:
    - "I:\\NemesisAI\\TelegramBot:/app:rw"
    - "I:\\NemesisAI:/src:rw"
@@ -34,5 +34,5 @@ services:
      Linux64:/opt/JetBrains/RiderDebuggerTools"
    - "C:\\Users\\airon\\AppData\\Local\\Programs\\Rider\\bin\\backend-log.xml:/etc/opt/JetBrains/RiderDebuggerTools/backend-log.xml"
    - "C:\\Users\\airon\\AppData\\Local\\JetBrains\\Rider2024.3\\log\\DebuggerWorker\\\
-      JetBrains.Debugger.Worker.2024_12_26_01_25_50:/var/opt/JetBrains/RiderDebuggerTools:rw"
+      JetBrains.Debugger.Worker.2024_12_26_03_21_12:/var/opt/JetBrains/RiderDebuggerTools:rw"
    working_dir: "/app"
--- a/TelegramBot/.env
+++ b/TelegramBot/.env
@@ -1,4 +1,4 @@
 TELEGRAM_BOT_TOKEN=yourTokenHere
 OPENAI_BASE_URL=http://llm-server/
-OPENAI_MODEL=Qwen2.5-7B-Instruct-Q8_0.gguf
+OPENAI_MODEL=Qwen2.5-7B-Instruct-Q8.gguf
 OPENAI_API_KEY=MyApiKey
--- a/TelegramBot/Program.cs
+++ b/TelegramBot/Program.cs
@@ -103,6 +103,7 @@ Console.WriteLine("OpenAI Chat Client created");
 using var cts = new CancellationTokenSource();
 var bot = new TelegramBotClient(token, cancellationToken:cts.Token);
 await bot.DropPendingUpdates();
 var me = bot.GetMe();
 bot.OnMessage += OnMessage;
 Console.WriteLine("Bot running");
@@ -123,22 +124,43 @@ async Task OnMessage(Message msg, UpdateType type)
             Message: {msg.Text} 
            """);
        var chatid = msg.Chat.Id;
-        //Check if the chat is already in the dictionary
+        //Check if the message is a reset command
-        if (!oaiChats.ContainsKey(chatid))
+        if (msg.Text.StartsWith("/reset")) {
-            AddChatToDictionary(chatid);
+            ResetChat(chatid);
-        //Add the current message to the chat
+            await bot.SendMessage(chatid, "Chat context has been reset");
-        oaiChats[chatid].Add(new UserChatMessage(msg.Text));
+            return;
-        //fetch existing messages history
+        }
-        var messages = oaiChats[chatid];
+        // Otherwise process it normally
-        //Fetch the response from the model
+        await AnswerChat(chatid, msg.Text);
        var result = chatClient.CompleteChat(messages).Value.Content[0].Text;
        //Add the response to the chat
        oaiChats[chatid].Add(new AssistantChatMessage(result));
        //Send the response to the user
        await bot.SendMessage(chatid, result);
    }
 }
 async Task AnswerChat(long chatId, string input) {
    //Check if the chat is already in the dictionary
    if (!oaiChats.ContainsKey(chatId))
        AddChatToDictionary(chatId);
    string text = input;
    //Limit the message to 1024 characters to avoid out of context jump
    if (input.Length > 1024) text = input.Substring(0, 1024); 
    //Add the current message to the chat
    oaiChats[chatId].Add(new UserChatMessage(text));
    //fetch existing messages history
    var messages = oaiChats[chatId];
    //Fetch the response from the model
    var result = chatClient.CompleteChat(messages).Value.Content[0].Text;
    //Add the response to the chat
    Console.WriteLine("Replying with: " + result);
    oaiChats[chatId].Add(new AssistantChatMessage(result));
    //Send the response to the user
    await bot.SendMessage(chatId, result);
 }
 void AddChatToDictionary(long id) {
    //Create a new chat object
    var chat = new List<ChatMessage>();
@@ -146,3 +168,10 @@ void AddChatToDictionary(long id) {
    //add the entry to the dictionary
    oaiChats.Add(id, chat);
 }
 void ResetChat(long chatId) {
    //Remove the chat from the dictionary
    oaiChats.Remove(chatId);
    //Add the chat back to the dictionary
    AddChatToDictionary(chatId);
 }
--- a/compose.yaml
+++ b/compose.yaml
@@ -14,7 +14,7 @@
      - ${MODEL_PATH}:/models
    ports:
      - "80:80"
-    command: -m /models/${MODEL_NAME} --port 80 --host 0.0.0.0 -n 512
+    command: -m /models/${MODEL_NAME} --port 80 --host 0.0.0.0 -n 128 -c 2048 --no-mmap -ngl 50 -fa -np 4 --keep 810
    deploy:
      resources:
        reservations:
`@@ -1,2 +1,2 @@`
	`MODEL_PATH=./model`	`MODEL_PATH=./model`
	`MODEL_NAME=Qwen2.5-7B-Instruct-Q8_0.gguf`	`MODEL_NAME=Qwen2.5-7B-Instruct-Q8.gguf`