added management to loaded models to prevent overloading GPU and keep things as fast as possible.
This commit is contained in:
parent
11c9f07957
commit
3278581bf7
@ -206,6 +206,7 @@ if [[ "$1" != "--update" ]]; then
|
||||
docker run -d -p 3000:8080 --gpus all \
|
||||
-e OPENAI_API_BASE_URL=http://pipelines:9099 \
|
||||
-e OPENAI_API_KEY=0p3n-w3bu! \
|
||||
-e OLLAMA_MAX_LOADED_MODELS=1
|
||||
-v ollama:/root/.ollama \
|
||||
-v open-webui:/app/backend/data \
|
||||
--name open-webui \
|
||||
@ -476,6 +477,7 @@ if [[ "$1" != "--update" ]]; then
|
||||
-e ENABLE_BACKENDS=llama-cuda,ollama \
|
||||
-e INCLUDE_DEFAULT_MODELS=true \
|
||||
-e AUTOLOAD_MODELS=true \
|
||||
-e MODEL_IDLE_TIMEOUT=600
|
||||
--restart unless-stopped \
|
||||
localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user