added management to loaded models to prevent overloading GPU and keep things as fast as possible.
This commit is contained in:
parent
11c9f07957
commit
3278581bf7
@ -206,6 +206,7 @@ if [[ "$1" != "--update" ]]; then
|
|||||||
docker run -d -p 3000:8080 --gpus all \
|
docker run -d -p 3000:8080 --gpus all \
|
||||||
-e OPENAI_API_BASE_URL=http://pipelines:9099 \
|
-e OPENAI_API_BASE_URL=http://pipelines:9099 \
|
||||||
-e OPENAI_API_KEY=0p3n-w3bu! \
|
-e OPENAI_API_KEY=0p3n-w3bu! \
|
||||||
|
-e OLLAMA_MAX_LOADED_MODELS=1
|
||||||
-v ollama:/root/.ollama \
|
-v ollama:/root/.ollama \
|
||||||
-v open-webui:/app/backend/data \
|
-v open-webui:/app/backend/data \
|
||||||
--name open-webui \
|
--name open-webui \
|
||||||
@ -476,6 +477,7 @@ if [[ "$1" != "--update" ]]; then
|
|||||||
-e ENABLE_BACKENDS=llama-cuda,ollama \
|
-e ENABLE_BACKENDS=llama-cuda,ollama \
|
||||||
-e INCLUDE_DEFAULT_MODELS=true \
|
-e INCLUDE_DEFAULT_MODELS=true \
|
||||||
-e AUTOLOAD_MODELS=true \
|
-e AUTOLOAD_MODELS=true \
|
||||||
|
-e MODEL_IDLE_TIMEOUT=600
|
||||||
--restart unless-stopped \
|
--restart unless-stopped \
|
||||||
localai/localai:latest-aio-gpu-nvidia-cuda-12
|
localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user