Added new localai:custom docker container, built from a customized version of localai that supports their memory addition 'localrecall' and their 'localagi' along wigh GPU support. Will be re-writing all of this into docker-compose methods in the future.

2025-05-19 16:43:22 -07:00 · 2025-05-19 16:43:22 -07:00 · e80f6c995a
commit e80f6c995a
parent 13028b8d2b
2 changed files with 167 additions and 22 deletions
--- a/27
+++ b/27
@ -0,0 +1,27 @@
 FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
 WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
    libgomp1 libgl1 ffmpeg curl python3 ca-certificates && \
    rm -rf /var/lib/apt/lists/*
 COPY local-ai /usr/local/bin/local-ai
 ENV MODEL_PATH=/models
 ENV CONFIG_PATH=/config
 ENV ENABLE_BACKENDS=llama-cuda,whispercpp,stablediffusion,ollama
 ENV AUTOLOAD_MODELS=true
 ENV INCLUDE_DEFAULT_MODELS=true
 VOLUME /models
 VOLUME /config
 HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
  CMD curl -f http://localhost:8080/readyz || exit 1
 EXPOSE 8080
 CMD ["/usr/local/bin/local-ai"]
--- a/setup-ai-stack.sh
+++ b/setup-ai-stack.sh
@ -44,30 +44,55 @@ if [[ "$1" == "--update" ]]; then
    echo "⚠️  JupyterLab is not running, skipping update."
  fi
 # --- LocalAI custom build and update ---
 LOCALAI_REPO=~/git-repos/LocalAI
 DOCKERFILE_URL="https://your-domain.com/path/to/Dockerfile"
-  # 🔁 Updating LocalAI (GPU build)
+echo "🔁 Rebuilding LocalAI (custom)..."
  if check_container_running "localai"; then
    echo "🧠 Updating LocalAI..."
    docker stop localai
    docker rm localai
  else
    echo "➕ Installing LocalAI..."
  fi
-  docker pull localai/localai:latest-aio-gpu-nvidia-cuda-12
+# Backup and remove existing repo
 if [ -d "$LOCALAI_REPO" ]; then
  echo "📦 Backing up existing LocalAI repo..."
  mv "$LOCALAI_REPO" "${LOCALAI_REPO}_backup_$(date +%s)"
 fi
-  # Ensure default models.yaml exists
+# Clone latest LocalAI
-  MODELS_YAML=~/ai-stack/localai/config/models.yaml
+git clone https://github.com/mudler/LocalAI.git "$LOCALAI_REPO"
  if [ ! -f "$MODELS_YAML" ]; then
    echo "📄 Creating default models.yaml..."
 cat > "$MODELS_YAML" <<EOF
 - name: phi
  backend: llama-cuda
  parameters:
    model: /models/phi-2.gguf
 EOF
  fi
 # Remove default Docker-related files
 cd "$LOCALAI_REPO"
 rm -f Dockerfile Dockerfile.aio docker-compose.yaml
 # Prepare and build LocalAI binary
 # NOTE - This portion required quite a few custom additions in order to build correctly... if you are running this from a machine that hasn't built it already, just better probably to await the implementation of the Docker-Compose methods
 # Docker-compose methods will be coming soon. 
 echo "🔧 Running make prepare..."
 make prepare
 echo "🧠 Building LocalAI binary with GPU, localrecall, and localagi..."
 make build \
  TAGS="localrecall,localagi,gpu" \
  BACKENDS="llama-cuda whispercpp stablediffusion ollama" \
  GPU_SUPPORT="cuda"
 # Download your custom Dockerfile
 echo "📥 Downloading custom Dockerfile..."
 curl -fsSL "$DOCKERFILE_URL" -o Dockerfile
 # Build the container
 echo "🔨 Building custom LocalAI Docker image..."
 docker build -t localai:custom .
 # Remove existing container
 docker stop localai 2>/dev/null || true
 docker rm localai 2>/dev/null || true
 # Create model and config directories if needed
 mkdir -p ~/ai-stack/localai/models
 mkdir -p ~/ai-stack/localai/config
 # Run new container
 docker run -d \
  --name localai \
  --gpus all \
@ -79,9 +104,48 @@ docker run -d \
  -e AUTOLOAD_MODELS=true \
  -e MODEL_PATH=/models \
  --restart unless-stopped \
-  localai/localai:latest-aio-gpu-nvidia-cuda-12
+  localai:custom
-echo "✅ LocalAI running at http://localhost:8080"
+echo "✅ LocalAI (custom) is now running at http://localhost:8080"
 #   # 🔁 Updating LocalAI (GPU build)
 #   if check_container_running "localai"; then
 #     echo "🧠 Updating LocalAI..."
 #     docker stop localai
 #     docker rm localai
 #   else
 #     echo "➕ Installing LocalAI..."
 #   fi
 #   docker pull localai/localai:latest-aio-gpu-nvidia-cuda-12
 #   # Ensure default models.yaml exists
 #   MODELS_YAML=~/ai-stack/localai/config/models.yaml
 #   if [ ! -f "$MODELS_YAML" ]; then
 #     echo "📄 Creating default models.yaml..."
 # cat > "$MODELS_YAML" <<EOF
 # - name: phi
 #   backend: llama-cuda
 #   parameters:
 #     model: /models/phi-2.gguf
 # EOF
 #   fi
 # docker run -d \
 #   --name localai \
 #   --gpus all \
 #   -p 8080:8080 \
 #   -v ~/ai-stack/localai/models:/models \
 #   -v ~/ai-stack/localai/config:/config \
 #   -e ENABLE_BACKENDS=llama-cuda,ollama \
 #   -e INCLUDE_DEFAULT_MODELS=true \
 #   -e AUTOLOAD_MODELS=true \
 #   -e MODEL_PATH=/models \
 #   --restart unless-stopped \
 #   localai/localai:latest-aio-gpu-nvidia-cuda-12
 # echo "✅ LocalAI running at http://localhost:8080"
@ -456,6 +520,60 @@ EOF
  fi
 fi
 if [[ "$1" != "--update" ]]; then
  if check_container_running "localai"; then
    echo "✅ Skipping LocalAI install (already running)."
  else
    read -p "➕ Install LocalAI (GPU-accelerated)? [y/N]: " localai_prompt
    if [[ "$localai_prompt" =~ ^[Yy]$ ]]; then
      echo "🧠 Installing LocalAI (NVIDIA CUDA 12)..."
      mkdir -p ~/ai-stack/localai/models
      mkdir -p ~/ai-stack/localai/config
      echo "📄 Creating default models.yaml..."
      cat > ~/ai-stack/localai/config/models.yaml <<EOF
 - name: phi
  backend: llama-cuda
  parameters:
    model: /models/phi-2.gguf
 EOF
      echo "📦 Using custom built LocalAI image (localai:custom)"
      docker run -d \
        --name localai \
        --gpus all \
        --network ai-stack-net \
        -p 8080:8080 \
        -v ~/ai-stack/localai/models:/models \
        -v ~/ai-stack/localai/config:/config \
        -e ENABLE_BACKENDS=llama-cuda,whispercpp,stablediffusion,ollama \
        -e INCLUDE_DEFAULT_MODELS=true \
        -e AUTOLOAD_MODELS=true \
        -e MODEL_PATH=/models \
        --restart unless-stopped \
        localai:custom
      echo "🌐 LocalAI is now running at http://localhost:8080"
    fi
  fi
 fi
 docker run -d \
  --name localai \
  --gpus all \
  --network ai-stack-net \
  -p 8080:8080 \
  -v ~/ai-stack/localai/models:/models \
  -v ~/ai-stack/localai/config:/config \
  -e ENABLE_BACKENDS=llama-cuda,whispercpp,stablediffusion,ollama \
  -e INCLUDE_DEFAULT_MODELS=true \
  -e AUTOLOAD_MODELS=true \
  -e MODEL_PATH=/models \
  --restart unless-stopped \
  localai:custom