From 3278581bf744419f30138a1d0c203cf38466aca9 Mon Sep 17 00:00:00 2001 From: crowetic Date: Tue, 20 May 2025 17:13:59 -0700 Subject: [PATCH] added management to loaded models to prevent overloading GPU and keep things as fast as possible. --- setup-ai-stack.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup-ai-stack.sh b/setup-ai-stack.sh index 1026ae0..8a2b04d 100644 --- a/setup-ai-stack.sh +++ b/setup-ai-stack.sh @@ -206,6 +206,7 @@ if [[ "$1" != "--update" ]]; then docker run -d -p 3000:8080 --gpus all \ -e OPENAI_API_BASE_URL=http://pipelines:9099 \ -e OPENAI_API_KEY=0p3n-w3bu! \ + -e OLLAMA_MAX_LOADED_MODELS=1 -v ollama:/root/.ollama \ -v open-webui:/app/backend/data \ --name open-webui \ @@ -476,6 +477,7 @@ if [[ "$1" != "--update" ]]; then -e ENABLE_BACKENDS=llama-cuda,ollama \ -e INCLUDE_DEFAULT_MODELS=true \ -e AUTOLOAD_MODELS=true \ + -e MODEL_IDLE_TIMEOUT=600 --restart unless-stopped \ localai/localai:latest-aio-gpu-nvidia-cuda-12