Added llama.cpp and reworked the code
This commit is contained in:
18
compose.yaml
18
compose.yaml
@@ -7,6 +7,18 @@
|
||||
env_file:
|
||||
- TelegramBot/.env
|
||||
|
||||
llama-cpp:
|
||||
image: ghcr.io/ggerganov/llama.cpp:server
|
||||
|
||||
llm-server:
|
||||
image: ghcr.io/ggerganov/llama.cpp:server-cuda
|
||||
container_name: llm-server
|
||||
volumes:
|
||||
- ${MODEL_PATH}:/models
|
||||
ports:
|
||||
- "80:80"
|
||||
command: -m /models/${MODEL_NAME} --port 80 --host 0.0.0.0 -n 512
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
Reference in New Issue
Block a user